From 43cfb183e7e7f5361b1c29c93038262eb9989697 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Sat, 24 Feb 2024 00:16:06 +0100 Subject: [PATCH 01/23] ENH `TransformedTargetRegressor.fit()` raises if only `inverse_func` is provided (#28483) Co-authored-by: Thomas J. Fan --- doc/whats_new/v1.5.rst | 4 ++++ sklearn/compose/_target.py | 20 +++++++++++++++----- sklearn/compose/tests/test_target.py | 10 +++++++++- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 1b84c24a17d43..53f220729f50b 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -74,6 +74,10 @@ Changelog - |Feature| A fitted :class:`compose.ColumnTransformer` now implements `__getitem__` which returns the fitted transformers by name. :pr:`27990` by `Thomas Fan`_. +- |ENH| :class:`compose.TransformedTargetRegressor` now raises an error in `fit` if + only `inverse_func` is provided without `func` (that would default to identity) being + explicitly set as well. :pr:`28483` by :user:`Stefanie Senger `. + :mod:`sklearn.dummy` .................... diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index 0ff84153ca180..3e6c94df8267a 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -75,9 +75,10 @@ class TransformedTargetRegressor( inverse_func : function, default=None Function to apply to the prediction of the regressor. Cannot be set at - the same time as `transformer`. The function needs to return a - 2-dimensional array. The inverse function is used to return - predictions to the same space of the original training labels. + the same time as `transformer`. The inverse function is used to return + predictions to the same space of the original training labels. If + `inverse_func` is set, `func` also needs to be provided. The inverse + function needs to return a 2-dimensional array. check_inverse : bool, default=True Whether to check that `transform` followed by `inverse_transform` @@ -173,9 +174,18 @@ def _fit_transformer(self, y): elif self.transformer is not None: self.transformer_ = clone(self.transformer) else: - if self.func is not None and self.inverse_func is None: + if (self.func is not None and self.inverse_func is None) or ( + self.func is None and self.inverse_func is not None + ): + lacking_param, existing_param = ( + ("func", "inverse_func") + if self.func is None + else ("inverse_func", "func") + ) raise ValueError( - "When 'func' is provided, 'inverse_func' must also be provided" + f"When '{existing_param}' is provided, '{lacking_param}' must also" + f" be provided. If {lacking_param} is supposed to be the default," + " you need to explicitly pass it the identity function." ) self.transformer_ = FunctionTransformer( func=self.func, diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py index 53242b7e0277b..a971553b64739 100644 --- a/sklearn/compose/tests/test_target.py +++ b/sklearn/compose/tests/test_target.py @@ -37,7 +37,8 @@ def test_transform_target_regressor_error(): match=r"fit\(\) got an unexpected " "keyword argument 'sample_weight'", ): regr.fit(X, y, sample_weight=sample_weight) - # func is given but inverse_func is not + + # one of (func, inverse_func) is given but the other one is not regr = TransformedTargetRegressor(func=np.exp) with pytest.raises( ValueError, @@ -45,6 +46,13 @@ def test_transform_target_regressor_error(): ): regr.fit(X, y) + regr = TransformedTargetRegressor(inverse_func=np.log) + with pytest.raises( + ValueError, + match="When 'inverse_func' is provided, 'func' must also be provided", + ): + regr.fit(X, y) + def test_transform_target_regressor_invertible(): X, y = friedman From 4e8253703013b38da503e2354d82fb7fa43dd4ec Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Sat, 24 Feb 2024 10:16:25 +1100 Subject: [PATCH 02/23] ENH Add `chain_method` to `ClassifierChain` (#27700) --- doc/whats_new/v1.5.rst | 6 ++ sklearn/multioutput.py | 164 ++++++++++++++++++++---------- sklearn/tests/test_multioutput.py | 95 +++++++++++------ 3 files changed, 178 insertions(+), 87 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 53f220729f50b..ae46741ae40bd 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -175,6 +175,12 @@ Changelog - |Enhancement| :term:`CV splitters ` that ignores the group parameter now raises a warning when groups are passed in to :term:`split`. :pr:`28210` by +:mod:`sklearn.multioutput` +.......................... + +- |Enhancement| `chain_method` parameter added to `:class:``multioutput.ClassifierChain`. + :pr:`27700` by :user:`Lucy Liu `. + :mod:`sklearn.pipeline` ....................... diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index c4b4f2bd3dd27..3cfd488297955 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -33,6 +33,7 @@ from .model_selection import cross_val_predict from .utils import Bunch, _print_elapsed_time, check_random_state from .utils._param_validation import HasMethods, StrOptions +from .utils._response import _get_response_values from .utils.metadata_routing import ( MetadataRouter, MethodMapping, @@ -43,7 +44,12 @@ from .utils.metaestimators import available_if from .utils.multiclass import check_classification_targets from .utils.parallel import Parallel, delayed -from .utils.validation import _check_method_params, check_is_fitted, has_fit_parameter +from .utils.validation import ( + _check_method_params, + _check_response_method, + check_is_fitted, + has_fit_parameter, +) __all__ = [ "MultiOutputRegressor", @@ -650,6 +656,41 @@ def _log_message(self, *, estimator_idx, n_estimators, processing_msg): return None return f"({estimator_idx} of {n_estimators}) {processing_msg}" + def _get_predictions(self, X, *, output_method): + """Get predictions for each model in the chain.""" + check_is_fitted(self) + X = self._validate_data(X, accept_sparse=True, reset=False) + Y_output_chain = np.zeros((X.shape[0], len(self.estimators_))) + Y_feature_chain = np.zeros((X.shape[0], len(self.estimators_))) + + # `RegressorChain` does not have a `chain_method_` parameter so we + # default to "predict" + chain_method = getattr(self, "chain_method_", "predict") + hstack = sp.hstack if sp.issparse(X) else np.hstack + for chain_idx, estimator in enumerate(self.estimators_): + previous_predictions = Y_feature_chain[:, :chain_idx] + X_aug = hstack((X, previous_predictions)) + + feature_predictions, _ = _get_response_values( + estimator, + X_aug, + response_method=chain_method, + ) + Y_feature_chain[:, chain_idx] = feature_predictions + + output_predictions, _ = _get_response_values( + estimator, + X_aug, + response_method=output_method, + ) + Y_output_chain[:, chain_idx] = output_predictions + + inv_order = np.empty_like(self.order_) + inv_order[self.order_] = np.arange(len(self.order_)) + Y_output = Y_output_chain[:, inv_order] + + return Y_output + @abstractmethod def fit(self, X, Y, **fit_params): """Fit the model to data matrix X and targets Y. @@ -724,6 +765,16 @@ def fit(self, X, Y, **fit_params): else: routed_params = Bunch(estimator=Bunch(fit=fit_params)) + if hasattr(self, "chain_method"): + chain_method = _check_response_method( + self.base_estimator, + self.chain_method, + ).__name__ + self.chain_method_ = chain_method + else: + # `RegressorChain` does not have a `chain_method` parameter + chain_method = "predict" + for chain_idx, estimator in enumerate(self.estimators_): message = self._log_message( estimator_idx=chain_idx + 1, @@ -741,8 +792,15 @@ def fit(self, X, Y, **fit_params): if self.cv is not None and chain_idx < len(self.estimators_) - 1: col_idx = X.shape[1] + chain_idx cv_result = cross_val_predict( - self.base_estimator, X_aug[:, :col_idx], y=y, cv=self.cv + self.base_estimator, + X_aug[:, :col_idx], + y=y, + cv=self.cv, + method=chain_method, ) + # `predict_proba` output is 2D, we use only output for classes[-1] + if cv_result.ndim > 1: + cv_result = cv_result[:, 1] if sp.issparse(X_aug): X_aug[:, col_idx] = np.expand_dims(cv_result, 1) else: @@ -763,25 +821,7 @@ def predict(self, X): Y_pred : array-like of shape (n_samples, n_classes) The predicted values. """ - check_is_fitted(self) - X = self._validate_data(X, accept_sparse=True, reset=False) - Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) - for chain_idx, estimator in enumerate(self.estimators_): - previous_predictions = Y_pred_chain[:, :chain_idx] - if sp.issparse(X): - if chain_idx == 0: - X_aug = X - else: - X_aug = sp.hstack((X, previous_predictions)) - else: - X_aug = np.hstack((X, previous_predictions)) - Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) - - inv_order = np.empty_like(self.order_) - inv_order[self.order_] = np.arange(len(self.order_)) - Y_pred = Y_pred_chain[:, inv_order] - - return Y_pred + return self._get_predictions(X, output_method="predict") class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): @@ -832,6 +872,19 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain): - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. + chain_method : {'predict', 'predict_proba', 'predict_log_proba', \ + 'decision_function'} or list of such str's, default='predict' + + Prediction method to be used by estimators in the chain for + the 'prediction' features of previous estimators in the chain. + + - if `str`, name of the method; + - if a list of `str`, provides the method names in order of + preference. The method used corresponds to the first method in + the list that is implemented by `base_estimator`. + + .. versionadded:: 1.5 + random_state : int, RandomState instance or None, optional (default=None) If ``order='random'``, determines random number generation for the chain order. @@ -858,6 +911,10 @@ class labels for each estimator in the chain. order_ : list The order of labels in the classifier chain. + chain_method_ : str + Prediction method used by estimators in the chain for the prediction + features. + n_features_in_ : int Number of features seen during :term:`fit`. Only defined if the underlying `base_estimator` exposes such an attribute when fit. @@ -905,6 +962,36 @@ class labels for each estimator in the chain. [0.0321..., 0.9935..., 0.0626...]]) """ + _parameter_constraints: dict = { + **_BaseChain._parameter_constraints, + "chain_method": [ + list, + tuple, + StrOptions( + {"predict", "predict_proba", "predict_log_proba", "decision_function"} + ), + ], + } + + def __init__( + self, + base_estimator, + *, + order=None, + cv=None, + chain_method="predict", + random_state=None, + verbose=False, + ): + super().__init__( + base_estimator, + order=order, + cv=cv, + random_state=random_state, + verbose=verbose, + ) + self.chain_method = chain_method + @_fit_context( # ClassifierChain.base_estimator is not validated yet prefer_skip_nested_validation=False @@ -953,22 +1040,7 @@ def predict_proba(self, X): Y_prob : array-like of shape (n_samples, n_classes) The predicted probabilities. """ - X = self._validate_data(X, accept_sparse=True, reset=False) - Y_prob_chain = np.zeros((X.shape[0], len(self.estimators_))) - Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) - for chain_idx, estimator in enumerate(self.estimators_): - previous_predictions = Y_pred_chain[:, :chain_idx] - if sp.issparse(X): - X_aug = sp.hstack((X, previous_predictions)) - else: - X_aug = np.hstack((X, previous_predictions)) - Y_prob_chain[:, chain_idx] = estimator.predict_proba(X_aug)[:, 1] - Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) - inv_order = np.empty_like(self.order_) - inv_order[self.order_] = np.arange(len(self.order_)) - Y_prob = Y_prob_chain[:, inv_order] - - return Y_prob + return self._get_predictions(X, output_method="predict_proba") def predict_log_proba(self, X): """Predict logarithm of probability estimates. @@ -1000,23 +1072,7 @@ def decision_function(self, X): Returns the decision function of the sample for each model in the chain. """ - X = self._validate_data(X, accept_sparse=True, reset=False) - Y_decision_chain = np.zeros((X.shape[0], len(self.estimators_))) - Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) - for chain_idx, estimator in enumerate(self.estimators_): - previous_predictions = Y_pred_chain[:, :chain_idx] - if sp.issparse(X): - X_aug = sp.hstack((X, previous_predictions)) - else: - X_aug = np.hstack((X, previous_predictions)) - Y_decision_chain[:, chain_idx] = estimator.decision_function(X_aug) - Y_pred_chain[:, chain_idx] = estimator.predict(X_aug) - - inv_order = np.empty_like(self.order_) - inv_order[self.order_] = np.arange(len(self.order_)) - Y_decision = Y_decision_chain[:, inv_order] - - return Y_decision + return self._get_predictions(X, output_method="decision_function") def get_metadata_routing(self): """Get metadata routing of this object. diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index c42938229d5a6..6048c7c500cb8 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -508,11 +508,14 @@ def generate_multilabel_dataset_with_correlations(): return X, Y_multi -def test_classifier_chain_fit_and_predict_with_linear_svc(): +@pytest.mark.parametrize("chain_method", ["predict", "decision_function"]) +def test_classifier_chain_fit_and_predict_with_linear_svc(chain_method): # Fit classifier chain and verify predict performance using LinearSVC X, Y = generate_multilabel_dataset_with_correlations() - classifier_chain = ClassifierChain(LinearSVC(dual="auto")) - classifier_chain.fit(X, Y) + classifier_chain = ClassifierChain( + LinearSVC(dual="auto"), + chain_method=chain_method, + ).fit(X, Y) Y_pred = classifier_chain.predict(X) assert Y_pred.shape == Y.shape @@ -530,12 +533,10 @@ def test_classifier_chain_fit_and_predict_with_sparse_data(csr_container): X, Y = generate_multilabel_dataset_with_correlations() X_sparse = csr_container(X) - classifier_chain = ClassifierChain(LogisticRegression()) - classifier_chain.fit(X_sparse, Y) + classifier_chain = ClassifierChain(LogisticRegression()).fit(X_sparse, Y) Y_pred_sparse = classifier_chain.predict(X_sparse) - classifier_chain = ClassifierChain(LogisticRegression()) - classifier_chain.fit(X, Y) + classifier_chain = ClassifierChain(LogisticRegression()).fit(X, Y) Y_pred_dense = classifier_chain.predict(X) assert_array_equal(Y_pred_sparse, Y_pred_dense) @@ -564,26 +565,41 @@ def test_classifier_chain_vs_independent_models(): ) +@pytest.mark.parametrize( + "chain_method", + ["predict", "predict_proba", "predict_log_proba", "decision_function"], +) @pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"]) -def test_base_chain_fit_and_predict(response_method): - # Fit base chain and verify predict performance +def test_classifier_chain_fit_and_predict(chain_method, response_method): + # Fit classifier chain and verify predict performance X, Y = generate_multilabel_dataset_with_correlations() - chains = [RegressorChain(Ridge()), ClassifierChain(LogisticRegression())] - for chain in chains: - chain.fit(X, Y) - Y_pred = chain.predict(X) - assert Y_pred.shape == Y.shape - assert [c.coef_.size for c in chain.estimators_] == list( - range(X.shape[1], X.shape[1] + Y.shape[1]) - ) + chain = ClassifierChain(LogisticRegression(), chain_method=chain_method) + chain.fit(X, Y) + Y_pred = chain.predict(X) + assert Y_pred.shape == Y.shape + assert [c.coef_.size for c in chain.estimators_] == list( + range(X.shape[1], X.shape[1] + Y.shape[1]) + ) - Y_prob = getattr(chains[1], response_method)(X) + Y_prob = getattr(chain, response_method)(X) if response_method == "predict_log_proba": Y_prob = np.exp(Y_prob) Y_binary = Y_prob >= 0.5 assert_array_equal(Y_binary, Y_pred) - assert isinstance(chains[1], ClassifierMixin) + assert isinstance(chain, ClassifierMixin) + + +def test_regressor_chain_fit_and_predict(): + # Fit regressor chain and verify Y and estimator coefficients shape + X, Y = generate_multilabel_dataset_with_correlations() + chain = RegressorChain(Ridge()) + chain.fit(X, Y) + Y_pred = chain.predict(X) + assert Y_pred.shape == Y.shape + assert [c.coef_.size for c in chain.estimators_] == list( + range(X.shape[1], X.shape[1] + Y.shape[1]) + ) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) @@ -619,24 +635,37 @@ def test_base_chain_random_order(): assert_array_almost_equal(est1.coef_, est2.coef_) -def test_base_chain_crossval_fit_and_predict(): +@pytest.mark.parametrize( + "chain_type, chain_method", + [ + ("classifier", "predict"), + ("classifier", "predict_proba"), + ("classifier", "predict_log_proba"), + ("classifier", "decision_function"), + ("regressor", ""), + ], +) +def test_base_chain_crossval_fit_and_predict(chain_type, chain_method): # Fit chain with cross_val_predict and verify predict # performance X, Y = generate_multilabel_dataset_with_correlations() - for chain in [ClassifierChain(LogisticRegression()), RegressorChain(Ridge())]: - chain.fit(X, Y) - chain_cv = clone(chain).set_params(cv=3) - chain_cv.fit(X, Y) - Y_pred_cv = chain_cv.predict(X) - Y_pred = chain.predict(X) - - assert Y_pred_cv.shape == Y_pred.shape - assert not np.all(Y_pred == Y_pred_cv) - if isinstance(chain, ClassifierChain): - assert jaccard_score(Y, Y_pred_cv, average="samples") > 0.4 - else: - assert mean_squared_error(Y, Y_pred_cv) < 0.25 + if chain_type == "classifier": + chain = ClassifierChain(LogisticRegression(), chain_method=chain_method) + else: + chain = RegressorChain(Ridge()) + chain.fit(X, Y) + chain_cv = clone(chain).set_params(cv=3) + chain_cv.fit(X, Y) + Y_pred_cv = chain_cv.predict(X) + Y_pred = chain.predict(X) + + assert Y_pred_cv.shape == Y_pred.shape + assert not np.all(Y_pred == Y_pred_cv) + if isinstance(chain, ClassifierChain): + assert jaccard_score(Y, Y_pred_cv, average="samples") > 0.4 + else: + assert mean_squared_error(Y, Y_pred_cv) < 0.25 @pytest.mark.parametrize( From 347b109437f055956889ec733acc34c05e9185b5 Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Mon, 26 Feb 2024 10:12:21 +0100 Subject: [PATCH 03/23] FIX fix scipy bug with `sp.hstack` in `ClassifierChain` and `RegressorChain` (#28524) --- sklearn/multioutput.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 3cfd488297955..f3b73628a66f5 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -669,6 +669,11 @@ def _get_predictions(self, X, *, output_method): hstack = sp.hstack if sp.issparse(X) else np.hstack for chain_idx, estimator in enumerate(self.estimators_): previous_predictions = Y_feature_chain[:, :chain_idx] + # if `X` is a scipy sparse dok_array, we convert it to a sparse + # coo_array format before hstacking, it's faster; see + # https://github.com/scipy/scipy/issues/20060#issuecomment-1937007039: + if sp.issparse(X) and not sp.isspmatrix(X) and X.format == "dok": + X = sp.coo_array(X) X_aug = hstack((X, previous_predictions)) feature_predictions, _ = _get_response_values( From 845be989654036741b62534c0dd6277ed7e47075 Mon Sep 17 00:00:00 2001 From: Thanh Lam DANG <70220760+lamdang2k@users.noreply.github.com> Date: Mon, 26 Feb 2024 11:16:07 +0100 Subject: [PATCH 04/23] FIX Avoid modifying X in-place when precomputed in OPTICS (#28491) Co-authored-by: Guillaume Lemaitre Co-authored-by: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Co-authored-by: Olivier Grisel --- doc/whats_new/v1.5.rst | 6 ++++++ sklearn/cluster/_optics.py | 1 + sklearn/cluster/tests/test_optics.py | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index ae46741ae40bd..32cd0f770c598 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -67,6 +67,12 @@ Changelog :pr:`123456` by :user:`Joe Bloggs `. where 123455 is the *pull request* number, not the issue number. +:mod:`sklearn.cluster` +...................... + +- |FIX| Create copy of precomputed sparse matrix within the `fit` method of + :class:`~cluster.OPTICS` to avoid in-place modification of the sparse matrix. + :pr:`28491` by :user:`Thanh Lam Dang `. :mod:`sklearn.compose` ...................... diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index 493b7f40389cb..230e7ae2129df 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -333,6 +333,7 @@ def fit(self, X, y=None): X = self._validate_data(X, dtype=dtype, accept_sparse="csr") if self.metric == "precomputed" and issparse(X): + X = X.copy() # copy to avoid in-place modification with warnings.catch_warnings(): warnings.simplefilter("ignore", SparseEfficiencyWarning) # Set each diagonal to an explicit value so each point is its diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py index d6e415e114ee1..e2140cf0f8b2c 100644 --- a/sklearn/cluster/tests/test_optics.py +++ b/sklearn/cluster/tests/test_optics.py @@ -816,6 +816,27 @@ def test_precomputed_dists(global_dtype, csr_container): assert_array_equal(clust1.labels_, clust2.labels_) +@pytest.mark.parametrize("csr_container", CSR_CONTAINERS) +def test_optics_input_not_modified_precomputed_sparse_nodiag(csr_container): + """Check that we don't modify in-place the pre-computed sparse matrix. + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/27508 + """ + X = np.random.RandomState(0).rand(6, 6) + # Add zeros on the diagonal that will be implicit when creating + # the sparse matrix. If `X` is modified in-place, the zeros from + # the diagonal will be made explicit. + np.fill_diagonal(X, 0) + X = csr_container(X) + assert all(row != col for row, col in zip(*X.nonzero())) + X_copy = X.copy() + OPTICS(metric="precomputed").fit(X) + # Make sure that we did not modify `X` in-place even by creating + # explicit 0s values. + assert X.nnz == X_copy.nnz + assert_array_equal(X.toarray(), X_copy.toarray()) + + def test_optics_predecessor_correction_ordering(): """Check that cluster correction using predecessor is working as expected. From 13747c429b2c15e0cb8e3d5c952e9b48011ba791 Mon Sep 17 00:00:00 2001 From: scikit-learn-bot Date: Mon, 26 Feb 2024 11:28:10 +0100 Subject: [PATCH 05/23] :lock: :robot: CI Update lock files for cirrus-arm CI build(s) :lock: :robot: (#28532) Co-authored-by: Lock file bot Co-authored-by: Olivier Grisel --- .../cirrus/pymin_conda_forge_linux-aarch64_conda.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock index 3640acb513242..80055673c22bf 100644 --- a/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock +++ b/build_tools/cirrus/pymin_conda_forge_linux-aarch64_conda.lock @@ -30,7 +30,7 @@ https://conda.anaconda.org/conda-forge/linux-aarch64/xz-5.2.6-h9cdd2b7_0.tar.bz2 https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.1.0-h31becfc_1.conda#8db7cff89510bec0b863a0a8ee6a7bce https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.1.0-h31becfc_1.conda#ad3d3a826b5848d99936e4466ebbaa26 https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran-ng-13.2.0-he9431aa_5.conda#fab7c6a8c84492e18cbe578820e97a56 -https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.42-h194ca79_0.conda#b8ff00cc9a5184726baea61244f8bec3 +https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.43-h194ca79_0.conda#1123e504d9254dd9494267ab9aba95f0 https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.45.1-h194ca79_0.conda#4190198deb1ed253eb938f6a6d92ff4f https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.15-h2a766a3_0.conda#eb3d8c8170e3d03f2564ed2024aa00c8 https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8fc344f_1.conda#105eb1e16bf83bfb2eb380a48032b655 @@ -61,11 +61,11 @@ https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.0-h0d9d63b_3.c https://conda.anaconda.org/conda-forge/noarch/packaging-23.2-pyhd8ed1ab_0.conda#79002079284aa895f883c6b7f3f88fd6 https://conda.anaconda.org/conda-forge/noarch/pluggy-1.4.0-pyhd8ed1ab_0.conda#139e9feb65187e916162917bb2484976 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.0-pyhd8ed1ab_0.conda#6df2be294365eca602cabb4f04a6efe2 +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.1-pyhd8ed1ab_0.conda#576de899521b7d43674ba3ef6eae9142 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.3.3-py39h7cc1d5f_1.conda#c383c279123694d7a586ec47320d1cb1 +https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.4-py39h7cc1d5f_0.conda#2c06a653ebfa389c18aea2d8f338df3b https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.1.0-py39h898b7ef_0.conda#8c072c9329aeea97a46005625267a851 https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7 https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a @@ -76,7 +76,7 @@ https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-21_linuxaarc https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-21_linuxaarch64_openblas.conda#ab08b651e3630c20d3032e59859f34f7 https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-10.2.0-py39h8ce38d7_0.conda#cf4745fb7f7cb5d0b90c476116c7d8ac https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.0-pyhd8ed1ab_0.conda#5ba1cc5b924226349d4a49fb547b7579 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.2-pyhd8ed1ab_0.conda#40bd3ef942b9642a3eb20b0bbf92469b https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.1.1-pyhd8ed1ab_0.conda#d04bd1b5bed9177dd7c3cef15e2b6710 https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-21_linuxaarch64_openblas.conda#be00a60ef5d88de133a28cb1fb6e0b31 From bb53bb3d7923b778344db63932e33272eff667c9 Mon Sep 17 00:00:00 2001 From: scikit-learn-bot Date: Mon, 26 Feb 2024 11:28:31 +0100 Subject: [PATCH 06/23] :lock: :robot: CI Update lock files for pypy CI build(s) :lock: :robot: (#28533) Co-authored-by: Lock file bot Co-authored-by: Olivier Grisel --- build_tools/azure/pypy3_linux-64_conda.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build_tools/azure/pypy3_linux-64_conda.lock b/build_tools/azure/pypy3_linux-64_conda.lock index df897fa229f4a..71075721a0706 100644 --- a/build_tools/azure/pypy3_linux-64_conda.lock +++ b/build_tools/azure/pypy3_linux-64_conda.lock @@ -32,7 +32,7 @@ https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9 https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.42-h2797004_0.conda#d67729828dc6ff7ba44a61062ad79880 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.1-h2797004_0.conda#fc4ccadfbf6d4784de88c41704792562 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 @@ -74,11 +74,11 @@ https://conda.anaconda.org/conda-forge/linux-64/pillow-10.2.0-py39hcf8a34e_0.con https://conda.anaconda.org/conda-forge/noarch/pluggy-1.4.0-pyhd8ed1ab_0.conda#139e9feb65187e916162917bb2484976 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.15-1_pypy39.conda#a418a6c16bd6f7ed56b92194214791a0 -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.0-pyhd8ed1ab_0.conda#6df2be294365eca602cabb4f04a6efe2 +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.1-pyhd8ed1ab_0.conda#576de899521b7d43674ba3ef6eae9142 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py39hf860d4a_1.conda#ed9f2e116805d111f969b78e71203eef +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hf860d4a_0.conda#e7fded713fb466e1e0670afce1761b47 https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hf860d4a_0.conda#f699157518d28d00c87542b4ec1273be https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-21_linux64_openblas.conda#77cefbfb4d47ba8cafef8e3f768a4538 @@ -86,7 +86,7 @@ https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.0-py39ha90811c_0.c https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.49.0-py39hf860d4a_0.conda#fa0d38d44f69d5c8ca476beb24fb456e https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.1.1-pyhd8ed1ab_0.conda#3d5fa25cf42f3f32a12b2d874ace8574 https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc -https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.0-pyhd8ed1ab_0.conda#5ba1cc5b924226349d4a49fb547b7579 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.2-pyhd8ed1ab_0.conda#40bd3ef942b9642a3eb20b0bbf92469b https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/linux-64/scipy-1.12.0-py39h6dedee3_2.conda#6c5d74bac41838f4377dfd45085e1fec https://conda.anaconda.org/conda-forge/linux-64/blas-2.121-openblas.conda#4a279792fd8861a15705516a52872eb6 From e2f9530f0fbe8e639daf850973f85c10e6edf07b Mon Sep 17 00:00:00 2001 From: Stefanie Senger <91849487+StefanieSenger@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:30:08 +0100 Subject: [PATCH 07/23] ENH Add metadata routing for `RANSACRegressor` (#28261) --- doc/metadata_routing.rst | 2 + doc/modules/linear_model.rst | 4 +- doc/whats_new/v1.5.rst | 12 ++ sklearn/linear_model/_ransac.py | 157 +++++++++++++++--- sklearn/linear_model/tests/test_ransac.py | 8 +- sklearn/tests/metadata_routing_common.py | 24 ++- sklearn/tests/test_metadata_routing.py | 8 +- .../test_metaestimators_metadata_routing.py | 138 ++++++++++++--- sklearn/utils/_metadata_requests.py | 4 +- 9 files changed, 292 insertions(+), 65 deletions(-) diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst index 4174f95e65ba0..8768657225976 100644 --- a/doc/metadata_routing.rst +++ b/doc/metadata_routing.rst @@ -287,6 +287,7 @@ Meta-estimators and functions supporting metadata routing: - :class:`sklearn.linear_model.LogisticRegressionCV` - :class:`sklearn.linear_model.MultiTaskElasticNetCV` - :class:`sklearn.linear_model.MultiTaskLassoCV` +- :class:`sklearn.linear_model.RANSACRegressor` - :class:`sklearn.model_selection.GridSearchCV` - :class:`sklearn.model_selection.HalvingGridSearchCV` - :class:`sklearn.model_selection.HalvingRandomSearchCV` @@ -315,6 +316,7 @@ Meta-estimators and tools not supporting metadata routing yet: - :class:`sklearn.feature_selection.RFE` - :class:`sklearn.feature_selection.RFECV` - :class:`sklearn.feature_selection.SequentialFeatureSelector` +- :class:`sklearn.impute.IterativeImputer` - :class:`sklearn.linear_model.RANSACRegressor` - :class:`sklearn.linear_model.RidgeClassifierCV` - :class:`sklearn.linear_model.RidgeCV` diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index d05c631865e9d..533ebce99e605 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -1536,10 +1536,10 @@ Each iteration performs the following steps: 1. Select ``min_samples`` random samples from the original data and check whether the set of data is valid (see ``is_data_valid``). -2. Fit a model to the random subset (``base_estimator.fit``) and check +2. Fit a model to the random subset (``estimator.fit``) and check whether the estimated model is valid (see ``is_model_valid``). 3. Classify all data as inliers or outliers by calculating the residuals - to the estimated model (``base_estimator.predict(X) - y``) - all data + to the estimated model (``estimator.predict(X) - y``) - all data samples with absolute residuals smaller than or equal to the ``residual_threshold`` are considered as inliers. 4. Save fitted model as best model if number of inlier samples is diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 32cd0f770c598..101481e4070b6 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -48,6 +48,18 @@ more details. via their `fit` methods. :pr:`28432` by :user:`Adam Li ` and :user:`Benjamin Bossan `. +Metadata Routing +---------------- + +The following models now support metadata routing in one or more or their +methods. Refer to the :ref:`Metadata Routing User Guide ` for +more details. + +- |Feature| :class:`linear_model.RANSACRegressor` now supports metadata routing + in its ``fit``, ``score`` and ``predict`` methods and route metadata to its + underlying estimator's' ``fit``, ``score`` and ``predict`` methods. + :pr:`28261` by :user:`Stefanie Senger `. + - |Feature| :class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor` now support metadata routing and pass ``**fit_params`` to the underlying estimators via their `fit` methods. diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index b2c25607f91c0..b6bf7b082fc5e 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -17,6 +17,7 @@ ) from ..exceptions import ConvergenceWarning from ..utils import check_consistent_length, check_random_state +from ..utils._bunch import Bunch from ..utils._param_validation import ( HasMethods, Interval, @@ -25,11 +26,20 @@ StrOptions, ) from ..utils.metadata_routing import ( - _raise_for_unsupported_routing, - _RoutingNotSupportedMixin, + MetadataRouter, + MethodMapping, + _raise_for_params, + _routing_enabled, + process_routing, ) from ..utils.random import sample_without_replacement -from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter +from ..utils.validation import ( + _check_method_params, + _check_sample_weight, + _deprecate_positional_args, + check_is_fitted, + has_fit_parameter, +) from ._base import LinearRegression _EPSILON = np.spacing(1) @@ -70,7 +80,6 @@ def _dynamic_max_trials(n_inliers, n_samples, min_samples, probability): class RANSACRegressor( - _RoutingNotSupportedMixin, MetaEstimatorMixin, RegressorMixin, MultiOutputMixin, @@ -306,7 +315,11 @@ def __init__( # RansacRegressor.estimator is not validated yet prefer_skip_nested_validation=False ) - def fit(self, X, y, sample_weight=None): + # TODO(1.7): remove `sample_weight` from the signature after deprecation + # cycle; for backwards compatibility: pop it from `fit_params` before the + # `_raise_for_params` check and reinsert it after the check + @_deprecate_positional_args(version="1.7") + def fit(self, X, y, *, sample_weight=None, **fit_params): """Fit estimator using RANSAC algorithm. Parameters @@ -324,6 +337,17 @@ def fit(self, X, y, sample_weight=None): .. versionadded:: 0.18 + **fit_params : dict + Parameters routed to the `fit` method of the sub-estimator via the + metadata routing API. + + .. versionadded:: 1.5 + + Only available if + `sklearn.set_config(enable_metadata_routing=True)` is set. See + :ref:`Metadata Routing User Guide ` for more + details. + Returns ------- self : object @@ -336,10 +360,10 @@ def fit(self, X, y, sample_weight=None): `is_data_valid` and `is_model_valid` return False for all `max_trials` randomly chosen sub-samples. """ - _raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight) # Need to validate separately here. We can't pass multi_output=True # because that would allow y to be csr. Delay expensive finiteness # check to the estimator's own input validation. + _raise_for_params(fit_params, self, "fit") check_X_params = dict(accept_sparse="csr", force_all_finite=False) check_y_params = dict(ensure_2d=False) X, y = self._validate_data( @@ -404,12 +428,22 @@ def fit(self, X, y, sample_weight=None): estimator_name = type(estimator).__name__ if sample_weight is not None and not estimator_fit_has_sample_weight: raise ValueError( - "%s does not support sample_weight. Samples" + "%s does not support sample_weight. Sample" " weights are only used for the calibration" " itself." % estimator_name ) + if sample_weight is not None: - sample_weight = _check_sample_weight(sample_weight, X) + fit_params["sample_weight"] = sample_weight + + if _routing_enabled(): + routed_params = process_routing(self, "fit", **fit_params) + else: + routed_params = Bunch() + routed_params.estimator = Bunch(fit={}, predict={}, score={}) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) + routed_params.estimator.fit = {"sample_weight": sample_weight} n_inliers_best = 1 score_best = -np.inf @@ -451,13 +485,13 @@ def fit(self, X, y, sample_weight=None): self.n_skips_invalid_data_ += 1 continue + # cut `fit_params` down to `subset_idxs` + fit_params_subset = _check_method_params( + X, params=routed_params.estimator.fit, indices=subset_idxs + ) + # fit model for current random sample set - if sample_weight is None: - estimator.fit(X_subset, y_subset) - else: - estimator.fit( - X_subset, y_subset, sample_weight=sample_weight[subset_idxs] - ) + estimator.fit(X_subset, y_subset, **fit_params_subset) # check if estimated model is valid if self.is_model_valid is not None and not self.is_model_valid( @@ -484,8 +518,17 @@ def fit(self, X, y, sample_weight=None): X_inlier_subset = X[inlier_idxs_subset] y_inlier_subset = y[inlier_idxs_subset] + # cut `fit_params` down to `inlier_idxs_subset` + score_params_inlier_subset = _check_method_params( + X, params=routed_params.estimator.score, indices=inlier_idxs_subset + ) + # score of inlier data set - score_subset = estimator.score(X_inlier_subset, y_inlier_subset) + score_subset = estimator.score( + X_inlier_subset, + y_inlier_subset, + **score_params_inlier_subset, + ) # same number of inliers but worse score -> skip current random # sample @@ -549,20 +592,17 @@ def fit(self, X, y, sample_weight=None): ) # estimate final model using all inliers - if sample_weight is None: - estimator.fit(X_inlier_best, y_inlier_best) - else: - estimator.fit( - X_inlier_best, - y_inlier_best, - sample_weight=sample_weight[inlier_best_idxs_subset], - ) + fit_params_best_idxs_subset = _check_method_params( + X, params=routed_params.estimator.fit, indices=inlier_best_idxs_subset + ) + + estimator.fit(X_inlier_best, y_inlier_best, **fit_params_best_idxs_subset) self.estimator_ = estimator self.inlier_mask_ = inlier_mask_best return self - def predict(self, X): + def predict(self, X, **params): """Predict using the estimated model. This is a wrapper for `estimator_.predict(X)`. @@ -572,6 +612,17 @@ def predict(self, X): X : {array-like or sparse matrix} of shape (n_samples, n_features) Input data. + **params : dict + Parameters routed to the `predict` method of the sub-estimator via + the metadata routing API. + + .. versionadded:: 1.5 + + Only available if + `sklearn.set_config(enable_metadata_routing=True)` is set. See + :ref:`Metadata Routing User Guide ` for more + details. + Returns ------- y : array, shape = [n_samples] or [n_samples, n_targets] @@ -584,9 +635,19 @@ def predict(self, X): accept_sparse=True, reset=False, ) - return self.estimator_.predict(X) - def score(self, X, y): + _raise_for_params(params, self, "predict") + + if _routing_enabled(): + predict_params = process_routing(self, "predict", **params).estimator[ + "predict" + ] + else: + predict_params = {} + + return self.estimator_.predict(X, **predict_params) + + def score(self, X, y, **params): """Return the score of the prediction. This is a wrapper for `estimator_.score(X, y)`. @@ -599,6 +660,17 @@ def score(self, X, y): y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. + **params : dict + Parameters routed to the `score` method of the sub-estimator via + the metadata routing API. + + .. versionadded:: 1.5 + + Only available if + `sklearn.set_config(enable_metadata_routing=True)` is set. See + :ref:`Metadata Routing User Guide ` for more + details. + Returns ------- z : float @@ -611,7 +683,38 @@ def score(self, X, y): accept_sparse=True, reset=False, ) - return self.estimator_.score(X, y) + + _raise_for_params(params, self, "score") + if _routing_enabled(): + score_params = process_routing(self, "score", **params).estimator["score"] + else: + score_params = {} + + return self.estimator_.score(X, y, **score_params) + + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + .. versionadded:: 1.5 + + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = MetadataRouter(owner=self.__class__.__name__).add( + estimator=self.estimator, + method_mapping=MethodMapping() + .add(caller="fit", callee="fit") + .add(caller="fit", callee="score") + .add(caller="score", callee="score") + .add(caller="predict", callee="predict"), + ) + return router def _more_tags(self): return { diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index b442f6b207e70..7b2bc66160ef3 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -461,7 +461,7 @@ def test_ransac_fit_sample_weight(): ransac_estimator = RANSACRegressor(random_state=0) n_samples = y.shape[0] weights = np.ones(n_samples) - ransac_estimator.fit(X, y, weights) + ransac_estimator.fit(X, y, sample_weight=weights) # sanity check assert ransac_estimator.inlier_mask_.shape[0] == n_samples @@ -498,7 +498,7 @@ def test_ransac_fit_sample_weight(): sample_weight = np.append(sample_weight, outlier_weight) X_ = np.append(X_, outlier_X, axis=0) y_ = np.append(y_, outlier_y) - ransac_estimator.fit(X_, y_, sample_weight) + ransac_estimator.fit(X_, y_, sample_weight=sample_weight) assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_) @@ -509,7 +509,7 @@ def test_ransac_fit_sample_weight(): err_msg = f"{estimator.__class__.__name__} does not support sample_weight." with pytest.raises(ValueError, match=err_msg): - ransac_estimator.fit(X, y, weights) + ransac_estimator.fit(X, y, sample_weight=weights) def test_ransac_final_model_fit_sample_weight(): @@ -517,7 +517,7 @@ def test_ransac_final_model_fit_sample_weight(): rng = check_random_state(42) sample_weight = rng.randint(1, 4, size=y.shape[0]) sample_weight = sample_weight / sample_weight.sum() - ransac = RANSACRegressor(estimator=LinearRegression(), random_state=0) + ransac = RANSACRegressor(random_state=0) ransac.fit(X, y, sample_weight=sample_weight) final_model = LinearRegression() diff --git a/sklearn/tests/metadata_routing_common.py b/sklearn/tests/metadata_routing_common.py index dc0387eb38f93..9c9d12b2f1f91 100644 --- a/sklearn/tests/metadata_routing_common.py +++ b/sklearn/tests/metadata_routing_common.py @@ -162,14 +162,17 @@ def fit(self, X, y, sample_weight="default", metadata="default"): ) return self - def predict(self, X, sample_weight="default", metadata="default"): - pass # pragma: no cover + def predict(self, X, y=None, sample_weight="default", metadata="default"): + record_metadata_not_default( + self, "predict", sample_weight=sample_weight, metadata=metadata + ) + return np.zeros(shape=(len(X),)) - # when needed, uncomment the implementation - # record_metadata_not_default( - # self, "predict", sample_weight=sample_weight, metadata=metadata - # ) - # return np.zeros(shape=(len(X),)) + def score(self, X, y, sample_weight="default", metadata="default"): + record_metadata_not_default( + self, "score", sample_weight=sample_weight, metadata=metadata + ) + return 1 class NonConsumingClassifier(ClassifierMixin, BaseEstimator): @@ -278,6 +281,13 @@ def decision_function(self, X, sample_weight="default", metadata="default"): ) return np.zeros(shape=(len(X),)) + # uncomment when needed + # def score(self, X, y, sample_weight="default", metadata="default"): + # record_metadata_not_default( + # self, "score", sample_weight=sample_weight, metadata=metadata + # ) + # return 1 + class ConsumingTransformer(TransformerMixin, BaseEstimator): """A transformer which accepts metadata on fit and transform. diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py index cf323d547e4d3..66a9e4fcfd1d2 100644 --- a/sklearn/tests/test_metadata_routing.py +++ b/sklearn/tests/test_metadata_routing.py @@ -679,7 +679,7 @@ class ConsumingRegressorWarn(ConsumingRegressor): " 'predict'}], 'router': {'fit': {'sample_weight': None, 'metadata':" " None}, 'partial_fit': {'sample_weight': None, 'metadata': None}," " 'predict': {'sample_weight': None, 'metadata': None}, 'score':" - " {'sample_weight': None}}}}" + " {'sample_weight': None, 'metadata': None}}}}" ), ), ], @@ -793,7 +793,8 @@ def test_metadata_routing_add(): == "{'est': {'mapping': [{'callee': 'fit', 'caller': 'fit'}], 'router': {'fit':" " {'sample_weight': 'weights', 'metadata': None}, 'partial_fit':" " {'sample_weight': None, 'metadata': None}, 'predict': {'sample_weight':" - " None, 'metadata': None}, 'score': {'sample_weight': None}}}}" + " None, 'metadata': None}, 'score': {'sample_weight': None, 'metadata':" + " None}}}}" ) # adding one with an instance of MethodMapping @@ -806,7 +807,8 @@ def test_metadata_routing_add(): == "{'est': {'mapping': [{'callee': 'score', 'caller': 'fit'}], 'router':" " {'fit': {'sample_weight': None, 'metadata': None}, 'partial_fit':" " {'sample_weight': None, 'metadata': None}, 'predict': {'sample_weight':" - " None, 'metadata': None}, 'score': {'sample_weight': True}}}}" + " None, 'metadata': None}, 'score': {'sample_weight': True, 'metadata':" + " None}}}}" ) diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py index 08a7e0ef9952a..bc0d4a649b0b7 100644 --- a/sklearn/tests/test_metaestimators_metadata_routing.py +++ b/sklearn/tests/test_metaestimators_metadata_routing.py @@ -118,7 +118,7 @@ def enable_slep006(): "X": X, "y": y, "estimator_routing_methods": ["fit"], - "preserves_metadata": False, + "preserves_metadata": "subset", }, { "metaestimator": ClassifierChain, @@ -287,10 +287,21 @@ def enable_slep006(): "cv_name": "cv", "cv_routing_methods": ["fit"], }, + { + "metaestimator": RANSACRegressor, + "estimator_name": "estimator", + "estimator": "regressor", + "init_args": {"min_samples": 0.5}, + "X": X, + "y": y, + "preserves_metadata": "subset", + "estimator_routing_methods": ["fit", "predict", "score"], + "method_mapping": {"fit": ["fit", "score"]}, + }, { "metaestimator": IterativeImputer, "estimator_name": "estimator", - "estimator": ConsumingRegressor, + "estimator": "regressor", "init_args": {"skip_complete": False}, "X": X, "y": y, @@ -299,7 +310,7 @@ def enable_slep006(): { "metaestimator": BaggingClassifier, "estimator_name": "estimator", - "estimator": ConsumingClassifier, + "estimator": "classifier", "X": X, "y": y, "preserves_metadata": False, @@ -308,7 +319,7 @@ def enable_slep006(): { "metaestimator": BaggingRegressor, "estimator_name": "estimator", - "estimator": ConsumingRegressor, + "estimator": "regressor", "X": X, "y": y, "preserves_metadata": False, @@ -344,6 +355,9 @@ def enable_slep006(): to the splitter - method_args: a dict of dicts, defining extra arguments needed to be passed to methods, such as passing `classes` to `partial_fit`. +- method_mapping: a dict of the form `{caller: [callee1, ...]}` which signals + which `.set_{method}_request` methods should be called to set request values. + If not present, a one-to-one mapping is assumed. """ # IDs used by pytest to get meaningful verbose messages when running the tests @@ -354,7 +368,6 @@ def enable_slep006(): AdaBoostRegressor(), FeatureUnion([]), GraphicalLassoCV(), - RANSACRegressor(), RFE(ConsumingClassifier()), RFECV(ConsumingClassifier()), RidgeCV(), @@ -407,13 +420,17 @@ def get_init_args(metaestimator_info, sub_estimator_consumes): if sub_estimator_consumes: if sub_estimator_type == "regressor": estimator = ConsumingRegressor(estimator_registry) - else: + elif sub_estimator_type == "classifier": estimator = ConsumingClassifier(estimator_registry) + else: + raise ValueError("Unpermitted `sub_estimator_type`.") # pragma: nocover else: if sub_estimator_type == "regressor": estimator = NonConsumingRegressor() - else: + elif sub_estimator_type == "classifier": estimator = NonConsumingClassifier() + else: + raise ValueError("Unpermitted `sub_estimator_type`.") # pragma: nocover kwargs[estimator_name] = estimator if "scorer_name" in metaestimator_info: scorer_name = metaestimator_info["scorer_name"] @@ -434,6 +451,38 @@ def get_init_args(metaestimator_info, sub_estimator_consumes): ) +def set_requests(estimator, *, method_mapping, methods, metadata_name, value=True): + """Call `set_{method}_request` on a list of methods from the sub-estimator. + + Parameters + ---------- + estimator : BaseEstimator + The estimator for which `set_{method}_request` methods are called. + + method_mapping : dict + The method mapping in the form of `{caller: [callee, ...]}`. + If a "caller" is not present in the method mapping, a one-to-one mapping is + assumed. + + methods : list of str + The list of methods as "caller"s for which the request for the child should + be set. + + metadata_name : str + The name of the metadata to be routed, usually either `"metadata"` or + `"sample_weight"` in our tests. + + value : None, bool, or str + The request value to be set, by default it's `True` + """ + for caller in methods: + for callee in method_mapping.get(caller, [caller]): + set_request_for_method = getattr(estimator, f"set_{callee}_request") + set_request_for_method(**{metadata_name: value}) + if is_classifier(estimator) and callee == "partial_fit": + set_request_for_method(classes=True) + + @pytest.mark.parametrize("estimator", UNSUPPORTED_ESTIMATORS) def test_unsupported_estimators_get_metadata_routing(estimator): """Test that get_metadata_routing is not implemented on meta-estimators for @@ -511,7 +560,32 @@ def test_error_on_missing_requests_for_sub_estimator(metaestimator): ) with pytest.raises(UnsetMetadataPassedError, match=re.escape(msg)): method = getattr(instance, method_name) - method(X, y, **method_kwargs) + if "fit" not in method_name: + # set request on fit + set_requests( + estimator, + method_mapping=metaestimator.get("method_mapping", {}), + methods=["fit"], + metadata_name=key, + ) + instance.fit(X, y, **method_kwargs) + # making sure the requests are unset, in case they were set as a + # side effect of setting them for fit. For instance, if method + # mapping for fit is: `"fit": ["fit", "score"]`, that would mean + # calling `.score` here would not raise, because we have already + # set request value for child estimator's `score`. + set_requests( + estimator, + method_mapping=metaestimator.get("method_mapping", {}), + methods=["fit"], + metadata_name=key, + value=None, + ) + try: + # `fit` and `partial_fit` accept y, others don't. + method(X, y, **method_kwargs) + except TypeError: + method(X, **method_kwargs) @pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS) @@ -523,17 +597,11 @@ def test_setting_request_on_sub_estimator_removes_error(metaestimator): # sub-estimator, e.g. MyMetaEstimator(estimator=MySubEstimator()) return - def set_request(estimator, method_name): - # e.g. call set_fit_request on estimator - set_request_for_method = getattr(estimator, f"set_{method_name}_request") - set_request_for_method(sample_weight=True, metadata=True) - if is_classifier(estimator) and method_name == "partial_fit": - set_request_for_method(classes=True) - cls = metaestimator["metaestimator"] X = metaestimator["X"] y = metaestimator["y"] routing_methods = metaestimator["estimator_routing_methods"] + method_mapping = metaestimator.get("method_mapping", {}) preserves_metadata = metaestimator.get("preserves_metadata", True) for method_name in routing_methods: @@ -545,16 +613,40 @@ def set_request(estimator, method_name): metaestimator, sub_estimator_consumes=True ) if scorer: - set_request(scorer, "score") + set_requests( + scorer, method_mapping={}, methods=["score"], metadata_name=key + ) if cv: cv.set_split_request(groups=True, metadata=True) - set_request(estimator, method_name) + + # `set_{method}_request({metadata}==True)` on the underlying objects + set_requests( + estimator, + method_mapping=method_mapping, + methods=[method_name], + metadata_name=key, + ) + instance = cls(**kwargs) method = getattr(instance, method_name) extra_method_args = metaestimator.get("method_args", {}).get( method_name, {} ) - method(X, y, **method_kwargs, **extra_method_args) + if "fit" not in method_name: + # fit before calling method + set_requests( + estimator, + method_mapping=metaestimator.get("method_mapping", {}), + methods=["fit"], + metadata_name=key, + ) + instance.fit(X, y, **method_kwargs, **extra_method_args) + try: + # `fit` and `partial_fit` accept y, others don't. + method(X, y, **method_kwargs, **extra_method_args) + except TypeError: + method(X, **method_kwargs, **extra_method_args) + # sanity check that registry is not empty, or else the test passes # trivially assert registry @@ -599,8 +691,14 @@ def set_request(estimator, method_name): set_request(estimator, method_name) method = getattr(instance, method_name) extra_method_args = metaestimator.get("method_args", {}).get(method_name, {}) - # This following line should pass w/o raising a routing error. - method(X, y, **extra_method_args) + if "fit" not in method_name: + instance.fit(X, y, **extra_method_args) + # The following should pass w/o raising a routing error. + try: + # `fit` and `partial_fit` accept y, others don't. + method(X, y, **extra_method_args) + except TypeError: + method(X, **extra_method_args) @pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS) diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index 83cdf7790c7cd..427e49e65b1da 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1251,8 +1251,8 @@ def func(**kw): if self.validate_keys and (set(kw) - set(self.keys)): raise TypeError( - f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments" - f" are: {set(self.keys)}" + f"Unexpected args: {set(kw) - set(self.keys)} in {self.name}. " + f"Accepted arguments are: {set(self.keys)}" ) requests = instance._get_metadata_request() From 74d130714b3c4f78fb1fd271a8af1a4bf8c41cac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Karlo=20Do=C5=A1ilovi=C4=87?= Date: Mon, 26 Feb 2024 15:35:42 +0100 Subject: [PATCH 08/23] ENH Add retry mechanism to fetch_xx functions. (#28160) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Thomas J. Fan Co-authored-by: Loïc Estève --- doc/whats_new/v1.5.rst | 17 +++++++ sklearn/datasets/_base.py | 28 +++++++++++- sklearn/datasets/_california_housing.py | 30 ++++++++++-- sklearn/datasets/_covtype.py | 21 ++++++++- sklearn/datasets/_kddcup99.py | 31 +++++++++++-- sklearn/datasets/_lfw.py | 53 ++++++++++++++++++++-- sklearn/datasets/_olivetti_faces.py | 21 ++++++++- sklearn/datasets/_rcv1.py | 25 ++++++++-- sklearn/datasets/_species_distributions.py | 36 +++++++++++++-- sklearn/datasets/_twenty_newsgroups.py | 47 +++++++++++++++++-- sklearn/datasets/tests/test_base.py | 28 ++++++++++++ 11 files changed, 308 insertions(+), 29 deletions(-) diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst index 101481e4070b6..765b17bc3ec06 100644 --- a/doc/whats_new/v1.5.rst +++ b/doc/whats_new/v1.5.rst @@ -96,6 +96,23 @@ Changelog only `inverse_func` is provided without `func` (that would default to identity) being explicitly set as well. :pr:`28483` by :user:`Stefanie Senger `. +:mod:`sklearn.datasets` +....................... + +- |Enhancement| Adds optional arguments `n_retries` and `delay` to functions + :func:`datasets.fetch_20newsgroups`, + :func:`datasets.fetch_20newsgroups_vectorized`, + :func:`datasets.fetch_california_housing`, + :func:`datasets.fetch_covtype`, + :func:`datasets.fetch_kddcup99`, + :func:`datasets.fetch_lfw_pairs`, + :func:`datasets.fetch_lfw_people`, + :func:`datasets.fetch_olivetti_faces`, + :func:`datasets.fetch_rcv1`, + and :func:`datasets.fetch_species_distributions`. + By default, the functions will retry up to 3 times in case of network failures. + :pr:`28160` by :user:`Zhehao Liu ` and :user:`Filip Karlo Došilović `. + :mod:`sklearn.dummy` .................... diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index f925999c030a0..f75d9aaf49f1d 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -11,12 +11,15 @@ import hashlib import os import shutil +import time +import warnings from collections import namedtuple from importlib import resources from numbers import Integral from os import environ, listdir, makedirs from os.path import expanduser, isdir, join, splitext from pathlib import Path +from urllib.error import URLError from urllib.request import urlretrieve import numpy as np @@ -1408,7 +1411,7 @@ def _sha256(path): return sha256hash.hexdigest() -def _fetch_remote(remote, dirname=None): +def _fetch_remote(remote, dirname=None, n_retries=3, delay=1): """Helper function to download a remote dataset into path Fetch a dataset pointed by remote's url, save into path using remote's @@ -1424,6 +1427,16 @@ def _fetch_remote(remote, dirname=None): dirname : str Directory to save the file to. + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : int, default=1 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- file_path: str @@ -1431,7 +1444,18 @@ def _fetch_remote(remote, dirname=None): """ file_path = remote.filename if dirname is None else join(dirname, remote.filename) - urlretrieve(remote.url, file_path) + while True: + try: + urlretrieve(remote.url, file_path) + break + except (URLError, TimeoutError): + if n_retries == 0: + # If no more retries are left, re-raise the caught exception. + raise + warnings.warn(f"Retry downloading from url: {remote.url}") + n_retries -= 1 + time.sleep(delay) + checksum = _sha256(file_path) if remote.checksum != checksum: raise OSError( diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py index a8a889fa8ce1d..e94996ccdec65 100644 --- a/sklearn/datasets/_california_housing.py +++ b/sklearn/datasets/_california_housing.py @@ -23,6 +23,7 @@ import logging import tarfile +from numbers import Integral, Real from os import PathLike, makedirs, remove from os.path import exists @@ -30,7 +31,7 @@ import numpy as np from ..utils import Bunch -from ..utils._param_validation import validate_params +from ..utils._param_validation import Interval, validate_params from . import get_data_home from ._base import ( RemoteFileMetadata, @@ -57,11 +58,19 @@ "download_if_missing": ["boolean"], "return_X_y": ["boolean"], "as_frame": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) def fetch_california_housing( - *, data_home=None, download_if_missing=True, return_X_y=False, as_frame=False + *, + data_home=None, + download_if_missing=True, + return_X_y=False, + as_frame=False, + n_retries=3, + delay=1.0, ): """Load the California housing dataset (regression). @@ -97,6 +106,16 @@ def fetch_california_housing( .. versionadded:: 0.23 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- dataset : :class:`~sklearn.utils.Bunch` @@ -154,7 +173,12 @@ def fetch_california_housing( "Downloading Cal. housing from {} to {}".format(ARCHIVE.url, data_home) ) - archive_path = _fetch_remote(ARCHIVE, dirname=data_home) + archive_path = _fetch_remote( + ARCHIVE, + dirname=data_home, + n_retries=n_retries, + delay=delay, + ) with tarfile.open(mode="r:gz", name=archive_path) as f: cal_housing = np.loadtxt( diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py index 4e1b1d7961f2e..1ecbd63ed7341 100644 --- a/sklearn/datasets/_covtype.py +++ b/sklearn/datasets/_covtype.py @@ -17,6 +17,7 @@ import logging import os from gzip import GzipFile +from numbers import Integral, Real from os.path import exists, join from tempfile import TemporaryDirectory @@ -24,7 +25,7 @@ import numpy as np from ..utils import Bunch, check_random_state -from ..utils._param_validation import validate_params +from ..utils._param_validation import Interval, validate_params from . import get_data_home from ._base import ( RemoteFileMetadata, @@ -71,6 +72,8 @@ "shuffle": ["boolean"], "return_X_y": ["boolean"], "as_frame": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -82,6 +85,8 @@ def fetch_covtype( shuffle=False, return_X_y=False, as_frame=False, + n_retries=3, + delay=1.0, ): """Load the covertype dataset (classification). @@ -129,6 +134,16 @@ def fetch_covtype( .. versionadded:: 0.24 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- dataset : :class:`~sklearn.utils.Bunch` @@ -183,7 +198,9 @@ def fetch_covtype( # os.rename to atomically move the data files to their target location. with TemporaryDirectory(dir=covtype_dir) as temp_dir: logger.info(f"Downloading {ARCHIVE.url}") - archive_path = _fetch_remote(ARCHIVE, dirname=temp_dir) + archive_path = _fetch_remote( + ARCHIVE, dirname=temp_dir, _retries=n_retries, delay=delay + ) Xy = np.genfromtxt(GzipFile(filename=archive_path), delimiter=",") X = Xy[:, :-1] diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py index 444bd01737901..597fb9c9dece3 100644 --- a/sklearn/datasets/_kddcup99.py +++ b/sklearn/datasets/_kddcup99.py @@ -12,6 +12,7 @@ import logging import os from gzip import GzipFile +from numbers import Integral, Real from os.path import exists, join import joblib @@ -19,7 +20,7 @@ from ..utils import Bunch, check_random_state from ..utils import shuffle as shuffle_method -from ..utils._param_validation import StrOptions, validate_params +from ..utils._param_validation import Interval, StrOptions, validate_params from . import get_data_home from ._base import ( RemoteFileMetadata, @@ -57,6 +58,8 @@ "download_if_missing": ["boolean"], "return_X_y": ["boolean"], "as_frame": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -70,6 +73,8 @@ def fetch_kddcup99( download_if_missing=True, return_X_y=False, as_frame=False, + n_retries=3, + delay=1.0, ): """Load the kddcup99 dataset (classification). @@ -127,6 +132,16 @@ def fetch_kddcup99( .. versionadded:: 0.24 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- data : :class:`~sklearn.utils.Bunch` @@ -160,6 +175,8 @@ def fetch_kddcup99( data_home=data_home, percent10=percent10, download_if_missing=download_if_missing, + n_retries=n_retries, + delay=delay, ) data = kddcup99.data @@ -243,7 +260,9 @@ def fetch_kddcup99( ) -def _fetch_brute_kddcup99(data_home=None, download_if_missing=True, percent10=True): +def _fetch_brute_kddcup99( + data_home=None, download_if_missing=True, percent10=True, n_retries=3, delay=1.0 +): """Load the kddcup99 dataset, downloading it if necessary. Parameters @@ -259,6 +278,12 @@ def _fetch_brute_kddcup99(data_home=None, download_if_missing=True, percent10=Tr percent10 : bool, default=True Whether to load only 10 percent of the data. + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + delay : float, default=1.0 + Number of seconds between retries. + Returns ------- dataset : :class:`~sklearn.utils.Bunch` @@ -354,7 +379,7 @@ def _fetch_brute_kddcup99(data_home=None, download_if_missing=True, percent10=Tr elif download_if_missing: _mkdirp(kddcup_dir) logger.info("Downloading %s" % archive.url) - _fetch_remote(archive, dirname=kddcup_dir) + _fetch_remote(archive, dirname=kddcup_dir, n_retries=n_retries, delay=delay) DT = np.dtype(dt) logger.debug("extracting archive") archive_path = join(kddcup_dir, archive.filename) diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py index 9c904cfec0016..fb8732fef8300 100644 --- a/sklearn/datasets/_lfw.py +++ b/sklearn/datasets/_lfw.py @@ -5,6 +5,7 @@ http://vis-www.cs.umass.edu/lfw/ """ + # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause @@ -73,7 +74,9 @@ # -def _check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): +def _check_fetch_lfw( + data_home=None, funneled=True, download_if_missing=True, n_retries=3, delay=1.0 +): """Helper function to download any missing LFW data""" data_home = get_data_home(data_home=data_home) @@ -87,7 +90,9 @@ def _check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): if not exists(target_filepath): if download_if_missing: logger.info("Downloading LFW metadata: %s", target.url) - _fetch_remote(target, dirname=lfw_home) + _fetch_remote( + target, dirname=lfw_home, n_retries=n_retries, delay=delay + ) else: raise OSError("%s is missing" % target_filepath) @@ -103,7 +108,9 @@ def _check_fetch_lfw(data_home=None, funneled=True, download_if_missing=True): if not exists(archive_path): if download_if_missing: logger.info("Downloading LFW data (~200MB): %s", archive.url) - _fetch_remote(archive, dirname=lfw_home) + _fetch_remote( + archive, dirname=lfw_home, n_retries=n_retries, delay=delay + ) else: raise OSError("%s is missing" % archive_path) @@ -244,6 +251,8 @@ def _fetch_lfw_people( "slice_": [tuple, Hidden(None)], "download_if_missing": ["boolean"], "return_X_y": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -257,6 +266,8 @@ def fetch_lfw_people( slice_=(slice(70, 195), slice(78, 172)), download_if_missing=True, return_X_y=False, + n_retries=3, + delay=1.0, ): """Load the Labeled Faces in the Wild (LFW) people dataset \ (classification). @@ -310,6 +321,16 @@ def fetch_lfw_people( .. versionadded:: 0.20 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- dataset : :class:`~sklearn.utils.Bunch` @@ -342,7 +363,11 @@ def fetch_lfw_people( .. versionadded:: 0.20 """ lfw_home, data_folder_path = _check_fetch_lfw( - data_home=data_home, funneled=funneled, download_if_missing=download_if_missing + data_home=data_home, + funneled=funneled, + download_if_missing=download_if_missing, + n_retries=n_retries, + delay=delay, ) logger.debug("Loading LFW people faces from %s", lfw_home) @@ -439,6 +464,8 @@ def _fetch_lfw_pairs( "color": ["boolean"], "slice_": [tuple, Hidden(None)], "download_if_missing": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -451,6 +478,8 @@ def fetch_lfw_pairs( color=False, slice_=(slice(70, 195), slice(78, 172)), download_if_missing=True, + n_retries=3, + delay=1.0, ): """Load the Labeled Faces in the Wild (LFW) pairs dataset (classification). @@ -507,6 +536,16 @@ def fetch_lfw_pairs( If False, raise an OSError if the data is not locally available instead of trying to download the data from the source site. + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- data : :class:`~sklearn.utils.Bunch` @@ -533,7 +572,11 @@ def fetch_lfw_pairs( Description of the Labeled Faces in the Wild (LFW) dataset. """ lfw_home, data_folder_path = _check_fetch_lfw( - data_home=data_home, funneled=funneled, download_if_missing=download_if_missing + data_home=data_home, + funneled=funneled, + download_if_missing=download_if_missing, + n_retries=n_retries, + delay=delay, ) logger.debug("Loading %s LFW pairs from %s", subset, lfw_home) diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py index 8e1b3c91e254b..b90eaf42a247b 100644 --- a/sklearn/datasets/_olivetti_faces.py +++ b/sklearn/datasets/_olivetti_faces.py @@ -13,6 +13,7 @@ # Copyright (c) 2011 David Warde-Farley # License: BSD 3 clause +from numbers import Integral, Real from os import PathLike, makedirs, remove from os.path import exists @@ -21,7 +22,7 @@ from scipy.io import loadmat from ..utils import Bunch, check_random_state -from ..utils._param_validation import validate_params +from ..utils._param_validation import Interval, validate_params from . import get_data_home from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr @@ -41,6 +42,8 @@ "random_state": ["random_state"], "download_if_missing": ["boolean"], "return_X_y": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -51,6 +54,8 @@ def fetch_olivetti_faces( random_state=0, download_if_missing=True, return_X_y=False, + n_retries=3, + delay=1.0, ): """Load the Olivetti faces data-set from AT&T (classification). @@ -90,6 +95,16 @@ def fetch_olivetti_faces( .. versionadded:: 0.22 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- data : :class:`~sklearn.utils.Bunch` @@ -122,7 +137,9 @@ def fetch_olivetti_faces( raise OSError("Data not found and `download_if_missing` is False") print("downloading Olivetti faces from %s to %s" % (FACES.url, data_home)) - mat_path = _fetch_remote(FACES, dirname=data_home) + mat_path = _fetch_remote( + FACES, dirname=data_home, n_retries=n_retries, delay=delay + ) mfile = loadmat(file_name=mat_path) # delete raw .mat data remove(mat_path) diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py index d9f392d872216..6d4b2172343fb 100644 --- a/sklearn/datasets/_rcv1.py +++ b/sklearn/datasets/_rcv1.py @@ -10,6 +10,7 @@ import logging from gzip import GzipFile +from numbers import Integral, Real from os import PathLike, makedirs, remove from os.path import exists, join @@ -19,7 +20,7 @@ from ..utils import Bunch from ..utils import shuffle as shuffle_ -from ..utils._param_validation import StrOptions, validate_params +from ..utils._param_validation import Interval, StrOptions, validate_params from . import get_data_home from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr from ._svmlight_format_io import load_svmlight_files @@ -80,6 +81,8 @@ "random_state": ["random_state"], "shuffle": ["boolean"], "return_X_y": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -91,6 +94,8 @@ def fetch_rcv1( random_state=None, shuffle=False, return_X_y=False, + n_retries=3, + delay=1.0, ): """Load the RCV1 multilabel dataset (classification). @@ -140,6 +145,16 @@ def fetch_rcv1( .. versionadded:: 0.20 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- dataset : :class:`~sklearn.utils.Bunch` @@ -185,7 +200,9 @@ def fetch_rcv1( files = [] for each in XY_METADATA: logger.info("Downloading %s" % each.url) - file_path = _fetch_remote(each, dirname=rcv1_dir) + file_path = _fetch_remote( + each, dirname=rcv1_dir, n_retries=n_retries, delay=delay + ) files.append(GzipFile(filename=file_path)) Xy = load_svmlight_files(files, n_features=N_FEATURES) @@ -211,7 +228,9 @@ def fetch_rcv1( not exists(sample_topics_path) or not exists(topics_path) ): logger.info("Downloading %s" % TOPICS_METADATA.url) - topics_archive_path = _fetch_remote(TOPICS_METADATA, dirname=rcv1_dir) + topics_archive_path = _fetch_remote( + TOPICS_METADATA, dirname=rcv1_dir, n_retries=n_retries, delay=delay + ) # parse the target file n_cat = -1 diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py index 7979604afab0e..2bd6f0207b069 100644 --- a/sklearn/datasets/_species_distributions.py +++ b/sklearn/datasets/_species_distributions.py @@ -39,6 +39,7 @@ import logging from io import BytesIO +from numbers import Integral, Real from os import PathLike, makedirs, remove from os.path import exists @@ -46,7 +47,7 @@ import numpy as np from ..utils import Bunch -from ..utils._param_validation import validate_params +from ..utils._param_validation import Interval, validate_params from . import get_data_home from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath @@ -136,10 +137,21 @@ def construct_grids(batch): @validate_params( - {"data_home": [str, PathLike, None], "download_if_missing": ["boolean"]}, + { + "data_home": [str, PathLike, None], + "download_if_missing": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], + }, prefer_skip_nested_validation=True, ) -def fetch_species_distributions(*, data_home=None, download_if_missing=True): +def fetch_species_distributions( + *, + data_home=None, + download_if_missing=True, + n_retries=3, + delay=1.0, +): """Loader for species distribution dataset from Phillips et. al. (2006). Read more in the :ref:`User Guide `. @@ -154,6 +166,16 @@ def fetch_species_distributions(*, data_home=None, download_if_missing=True): If False, raise an OSError if the data is not locally available instead of trying to download the data from the source site. + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- data : :class:`~sklearn.utils.Bunch` @@ -242,7 +264,9 @@ def fetch_species_distributions(*, data_home=None, download_if_missing=True): if not download_if_missing: raise OSError("Data not found and `download_if_missing` is False") logger.info("Downloading species data from %s to %s" % (SAMPLES.url, data_home)) - samples_path = _fetch_remote(SAMPLES, dirname=data_home) + samples_path = _fetch_remote( + SAMPLES, dirname=data_home, n_retries=n_retries, delay=delay + ) with np.load(samples_path) as X: # samples.zip is a valid npz for f in X.files: fhandle = BytesIO(X[f]) @@ -255,7 +279,9 @@ def fetch_species_distributions(*, data_home=None, download_if_missing=True): logger.info( "Downloading coverage data from %s to %s" % (COVERAGES.url, data_home) ) - coverages_path = _fetch_remote(COVERAGES, dirname=data_home) + coverages_path = _fetch_remote( + COVERAGES, dirname=data_home, n_retries=n_retries, delay=delay + ) with np.load(coverages_path) as X: # coverages.zip is a valid npz coverages = [] for f in X.files: diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py index 862f533548857..b5476f5622cff 100644 --- a/sklearn/datasets/_twenty_newsgroups.py +++ b/sklearn/datasets/_twenty_newsgroups.py @@ -21,6 +21,7 @@ test sets. The compressed dataset size is around 14 Mb compressed. Once uncompressed the train set is 52 MB and the test set is 34 MB. """ + # Copyright (c) 2011 Olivier Grisel # License: BSD 3 clause @@ -32,6 +33,7 @@ import shutil import tarfile from contextlib import suppress +from numbers import Integral, Real import joblib import numpy as np @@ -40,7 +42,7 @@ from .. import preprocessing from ..feature_extraction.text import CountVectorizer from ..utils import Bunch, check_random_state -from ..utils._param_validation import StrOptions, validate_params +from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils.fixes import tarfile_extractall from . import get_data_home, load_files from ._base import ( @@ -66,7 +68,7 @@ TEST_FOLDER = "20news-bydate-test" -def _download_20newsgroups(target_dir, cache_path): +def _download_20newsgroups(target_dir, cache_path, n_retries, delay): """Download the 20 newsgroups data and stored it as a zipped pickle.""" train_path = os.path.join(target_dir, TRAIN_FOLDER) test_path = os.path.join(target_dir, TEST_FOLDER) @@ -74,7 +76,9 @@ def _download_20newsgroups(target_dir, cache_path): os.makedirs(target_dir, exist_ok=True) logger.info("Downloading dataset from %s (14 MB)", ARCHIVE.url) - archive_path = _fetch_remote(ARCHIVE, dirname=target_dir) + archive_path = _fetch_remote( + ARCHIVE, dirname=target_dir, n_retries=n_retries, delay=delay + ) logger.debug("Decompressing %s", archive_path) with tarfile.open(archive_path, "r:gz") as fp: @@ -165,6 +169,8 @@ def strip_newsgroup_footer(text): "remove": [tuple], "download_if_missing": ["boolean"], "return_X_y": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -178,6 +184,8 @@ def fetch_20newsgroups( remove=(), download_if_missing=True, return_X_y=False, + n_retries=3, + delay=1.0, ): """Load the filenames and data from the 20 newsgroups dataset \ (classification). @@ -241,6 +249,16 @@ def fetch_20newsgroups( .. versionadded:: 0.22 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- bunch : :class:`~sklearn.utils.Bunch` @@ -286,7 +304,10 @@ def fetch_20newsgroups( if download_if_missing: logger.info("Downloading 20news dataset. This may take a few minutes.") cache = _download_20newsgroups( - target_dir=twenty_home, cache_path=cache_path + target_dir=twenty_home, + cache_path=cache_path, + n_retries=n_retries, + delay=delay, ) else: raise OSError("20Newsgroups dataset not found") @@ -360,6 +381,8 @@ def fetch_20newsgroups( "return_X_y": ["boolean"], "normalize": ["boolean"], "as_frame": ["boolean"], + "n_retries": [Interval(Integral, 1, None, closed="left")], + "delay": [Interval(Real, 0.0, None, closed="neither")], }, prefer_skip_nested_validation=True, ) @@ -372,6 +395,8 @@ def fetch_20newsgroups_vectorized( return_X_y=False, normalize=True, as_frame=False, + n_retries=3, + delay=1.0, ): """Load and vectorize the 20 newsgroups dataset (classification). @@ -443,6 +468,16 @@ def fetch_20newsgroups_vectorized( .. versionadded:: 0.24 + n_retries : int, default=3 + Number of retries when HTTP errors are encountered. + + .. versionadded:: 1.5 + + delay : float, default=1.0 + Number of seconds between retries. + + .. versionadded:: 1.5 + Returns ------- bunch : :class:`~sklearn.utils.Bunch` @@ -485,6 +520,8 @@ def fetch_20newsgroups_vectorized( random_state=12, remove=remove, download_if_missing=download_if_missing, + n_retries=n_retries, + delay=delay, ) data_test = fetch_20newsgroups( @@ -495,6 +532,8 @@ def fetch_20newsgroups_vectorized( random_state=12, remove=remove, download_if_missing=download_if_missing, + n_retries=n_retries, + delay=delay, ) if os.path.exists(target_file): diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 0a1190060a055..b79f8c47c55c5 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -1,3 +1,4 @@ +import io import os import shutil import tempfile @@ -6,6 +7,8 @@ from importlib import resources from pathlib import Path from pickle import dumps, loads +from unittest.mock import Mock +from urllib.error import HTTPError import numpy as np import pytest @@ -24,6 +27,8 @@ load_wine, ) from sklearn.datasets._base import ( + RemoteFileMetadata, + _fetch_remote, load_csv_data, load_gzip_compressed_csv_data, ) @@ -363,3 +368,26 @@ def test_load_boston_error(): msg = "cannot import name 'non_existing_function' from 'sklearn.datasets'" with pytest.raises(ImportError, match=msg): from sklearn.datasets import non_existing_function # noqa + + +def test_fetch_remote_raise_warnings_with_invalid_url(monkeypatch): + """Check retry mechanism in _fetch_remote.""" + + url = "https://scikit-learn.org/this_file_does_not_exist.tar.gz" + invalid_remote_file = RemoteFileMetadata("invalid_file", url, None) + urlretrieve_mock = Mock( + side_effect=HTTPError( + url=url, code=404, msg="Not Found", hdrs=None, fp=io.BytesIO() + ) + ) + monkeypatch.setattr("sklearn.datasets._base.urlretrieve", urlretrieve_mock) + + with pytest.warns(UserWarning, match="Retry downloading") as record: + with pytest.raises(HTTPError, match="HTTP Error 404"): + _fetch_remote(invalid_remote_file, n_retries=3, delay=0) + + assert urlretrieve_mock.call_count == 4 + + for r in record: + assert str(r.message) == f"Retry downloading from url: {url}" + assert len(record) == 3 From 3ce527dd28555618e8937fecbddc9517f85a9bcb Mon Sep 17 00:00:00 2001 From: scikit-learn-bot Date: Mon, 26 Feb 2024 16:53:54 +0100 Subject: [PATCH 09/23] :lock: :robot: CI Update lock files for main CI build(s) :lock: :robot: (#28461) Co-authored-by: Lock file bot Co-authored-by: Olivier Grisel --- build_tools/azure/debian_atlas_32bit_lock.txt | 2 +- ...latest_conda_forge_mkl_linux-64_conda.lock | 42 ++++++------ ...pylatest_conda_forge_mkl_osx-64_conda.lock | 24 +++---- ...test_conda_mkl_no_openmp_osx-64_conda.lock | 8 +-- ...st_pip_openblas_pandas_linux-64_conda.lock | 20 +++--- ...onda_defaults_openblas_linux-64_conda.lock | 22 ++++--- .../pymin_conda_forge_mkl_win-64_conda.lock | 26 ++++---- ...e_openblas_ubuntu_2204_linux-64_conda.lock | 34 +++++----- build_tools/azure/ubuntu_atlas_lock.txt | 2 +- build_tools/circle/doc_linux-64_conda.lock | 66 +++++++++---------- .../doc_min_dependencies_linux-64_conda.lock | 32 ++++----- 11 files changed, 141 insertions(+), 137 deletions(-) diff --git a/build_tools/azure/debian_atlas_32bit_lock.txt b/build_tools/azure/debian_atlas_32bit_lock.txt index 6f2cac31e4eb9..5393b0ddc1e60 100644 --- a/build_tools/azure/debian_atlas_32bit_lock.txt +++ b/build_tools/azure/debian_atlas_32bit_lock.txt @@ -6,7 +6,7 @@ # attrs==23.2.0 # via pytest -coverage==7.4.1 +coverage==7.4.3 # via pytest-cov cython==3.0.8 # via -r build_tools/azure/debian_atlas_32bit_requirements.txt diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock index 40f4373b62d20..fdf698cc9f084 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock @@ -20,7 +20,7 @@ https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.10-hd590300_0.conda https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00 https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.9.0-hd590300_0.conda#71b89db63b5b504e7afc8ad901172e1e https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hd590300_5.conda#69b8b6202a07720f448be700e300ccf4 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.26.0-hd590300_0.conda#a86d90025198fd411845fc245ebc06c8 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.27.0-hd590300_0.conda#f6afff0e9ee08d2f1b897881a4f38cdb https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37 https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1004.tar.bz2#cddaf2c63ea4a5901cf09524c490ecdc https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220 @@ -81,9 +81,9 @@ https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2. https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.48-h71f35ed_0.conda#4d18d86916705d352d5f4adfb7f0edd3 https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.42-h2797004_0.conda#d67729828dc6ff7ba44a61062ad79880 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-3.21.12-hfc55251_2.conda#e3a7d4ba09b8dc939b98fef55f539220 https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.1-h2797004_0.conda#fc4ccadfbf6d4784de88c41704792562 https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe @@ -104,7 +104,7 @@ https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_9.cond https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.3-h783c2da_0.conda#9bd06b12bbfa6fd1740fd23af4b0f0c7 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.4-h783c2da_0.conda#d86baf8740d1a906b9716f2a0bac2f2d https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.54.3-hb20ce57_0.conda#7af7c59ab24db007dfd82e0a3a343f66 https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.9.3-default_h554bfaf_1009.conda#f36ddc11ca46958197a45effdd286e45 @@ -114,9 +114,9 @@ https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.18.1-h8fd135c_2.cond https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-ha9c0a0a_2.conda#55ed21669b2015f77c180feb1dd41930 https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-17.0.6-h4dfa4b3_0.conda#c1665f9c1c9f6c93d8b4e492a6a39056 https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_6.conda#e87530d1b12dd7f4e0f856dc07358d60 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.97-h1d7d5a4_0.conda#b916d71a3032416e3f9136090d814472 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 https://conda.anaconda.org/conda-forge/linux-64/orc-1.9.0-h2f23424_1.conda#9571eb3eb0f7fe8b59956a7786babbcd -https://conda.anaconda.org/conda-forge/linux-64/python-3.11.7-hab00c5b_1_cpython.conda#27cf681282c11dba7b0b1fd266e8f289 +https://conda.anaconda.org/conda-forge/linux-64/python-3.11.8-hab00c5b_0_cpython.conda#2fdc314ee058eda0114738a9309d3683 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816 @@ -135,7 +135,7 @@ https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#e https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.3-hfc55251_0.conda#41d2f46e0ac8372eeb959860713d9b21 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.4-hfc55251_0.conda#d184ba1bf15a2bbb3be6118c90fd487d https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py311h9547e67_1.conda#2c65bdf442b0d37aad080c8a4e0d452f https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 @@ -150,16 +150,16 @@ https://conda.anaconda.org/conda-forge/noarch/packaging-23.2-pyhd8ed1ab_0.conda# https://conda.anaconda.org/conda-forge/noarch/pluggy-1.4.0-pyhd8ed1ab_0.conda#139e9feb65187e916162917bb2484976 https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.4-pyhd8ed1ab_0.conda#c79cacf8a06a51552fc651652f170208 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.0.3-pyhd8ed1ab_0.conda#40695fdfd15a92121ed2922900d0308b +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.1-pyhd8ed1ab_0.conda#576de899521b7d43674ba3ef6eae9142 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.11.0-h00ab1b0_1.conda#4531d2927578e7e254ff3bcf6457518c -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py311h459d7ec_1.conda#a700fcb5cedd3e72d0c75d095c7a6eda -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.9.0-pyha770c72_0.conda#a92a6440c3fe7052d63244f3aba2a4a7 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py311h459d7ec_0.conda#cc7727006191b8f3630936b339a76cd0 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.10.0-pyha770c72_0.conda#16ae769069b380646c47142d719ef466 https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e @@ -168,14 +168,14 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_ https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.7.3-h28f7589_1.conda#97503d3e565004697f1651753aa95b9e https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.9.3-hb447be9_1.conda#c520669eb0be9269a5f0d8ef62531882 https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e -https://conda.anaconda.org/conda-forge/linux-64/coverage-7.4.1-py311h459d7ec_0.conda#9caf3270065a2d40fd9a443ba1568e96 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.48.1-py311h459d7ec_0.conda#36363685b6e56682b1b256eb0ad503f6 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.3-hfc55251_0.conda#e08e51acc7d1ae8dbe13255e7b4c64ac +https://conda.anaconda.org/conda-forge/linux-64/coverage-7.4.3-py311h459d7ec_1.conda#4fb7f674bf6839da62317a7c6e725c55 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.49.0-py311h459d7ec_0.conda#d66c9e36ab104f94e35b015c86c2fcb4 +https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.4-hfc55251_0.conda#f36a7b2420c3fc3c48a3d609841d8fee https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_hb11cfb5_4.conda#c90f4cbb57839c98fef8f830e4b9972f https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.12.0-hac9eb74_1.conda#0dee716254497604762957076ac76540 https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.6.0-hd429924_1.conda#1dbcc04604fdf1e526e6d1b0b6938396 -https://conda.anaconda.org/conda-forge/noarch/meson-1.3.1-pyhd8ed1ab_0.conda#54744574be599bff37ee4c3624ed02d2 +https://conda.anaconda.org/conda-forge/noarch/meson-1.3.2-pyhd8ed1ab_0.conda#8d18c47cc233a35c81450ba1ce601eb4 https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.2.1-h84fe81f_16997.conda#a7ce56d5757f5b57e7daabe703ade5bb https://conda.anaconda.org/conda-forge/linux-64/pillow-10.2.0-py311ha6c5da5_0.conda#a5ccd7f2271f28b7d2de0b02b64e3796 https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 @@ -199,16 +199,16 @@ https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_mkl.ta https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.tar.bz2#a2f166748917d6d6e4707841ca1f519e https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.10.57-h85b1a90_19.conda#0605d3d60857fc07bd6a11e878fe0f08 https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.4-py311h64a7726_0.conda#a502d7aad449a1206efb366d6a12c52d -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h450f30e_18.conda#ef0430f8df5dcdedcaaab340b228f30c +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h5810be5_19.conda#54866f708d43002a514d0b9b0f84bc11 https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.0-py311h9547e67_0.conda#40828c5b36ef52433e21f89943e09f33 https://conda.anaconda.org/conda-forge/linux-64/libarrow-12.0.1-hb87d912_8_cpu.conda#3f3b11398fe79b578e3c44dd00a44e4a -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.0-py311h320fe9a_0.conda#b9e7a2cb2c47bbb99c05d1892500be45 -https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.7-py311h2bb2bab_0.conda#cef7c3e28a7f01c4c97749e48391e809 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.1-py311h320fe9a_0.conda#aac8d7137fedc2fd5f8320bf50e4204c +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.10-py311h2bb2bab_0.conda#445ae91124b41ba5496989f1cb657b2b https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py311hf0fb5b6_5.conda#ec7e45bc76d9d0b69a74a2075932b8e8 https://conda.anaconda.org/conda-forge/linux-64/pytorch-1.13.1-cpu_py311h410fd25_1.conda#ddd2fadddf89e3dc3d541a2537fce010 https://conda.anaconda.org/conda-forge/linux-64/scipy-1.12.0-py311h64a7726_2.conda#24ca5107ab75c5521067b8ba505dfae5 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.2-py311h54ef318_0.conda#9f80753bc008bfc9b95f39d9ff9f1694 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.3-py311h54ef318_0.conda#014c115be880802d2372ac6ed665f526 https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py311h92ebd52_1.conda#586ea5aa4a4ce2e7dbecb6c7416fc8ac https://conda.anaconda.org/conda-forge/linux-64/pyarrow-12.0.1-py311h39c9aba_8_cpu.conda#587370a25bb2c50cce90909ce20d38b8 https://conda.anaconda.org/conda-forge/linux-64/pytorch-cpu-1.13.1-cpu_py311hdb170b5_1.conda#a805d5f103e493f207613283d8acbbe1 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.2-py311h38be061_0.conda#ecffdcca48fcf288c2d9554e749be7ec +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.3-py311h38be061_0.conda#0452c2cca94bdda38a16cf7b84edcd27 diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock index da7e7fd243935..15f22dbac951e 100644 --- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock @@ -29,7 +29,7 @@ https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2#f9d6 https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h0dc2134_1.conda#9ee0bab91b2ca579e10353738be36063 https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h0dc2134_1.conda#8a421fe09c6187f0eb5e2338a8a8be6d https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-13.2.0-h2873a65_3.conda#e4fb4d23ec2870ff3c40d10afe305aec -https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.42-h92b6c6a_0.conda#7654da21e9d7ca6a8c87fbc77448588e +https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.43-h92b6c6a_0.conda#65dcddb15965c9de2c0365cb14910532 https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.45.1-h92b6c6a_0.conda#e451d14a5412cdc68be50493df251f55 https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.15-hb7f2c08_0.conda#5513f57e0238c87c12dffedbcc9c1a4a https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.12.5-hc0ae0f7_0.conda#abe27e7ab68b95e8d0e41cd5018ec8ae @@ -56,7 +56,7 @@ https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-16.0.6-hbedff68_3.conda#e9356b0807462e8f84c1384a8da539a5 https://conda.anaconda.org/conda-forge/osx-64/mpc-1.3.1-h81bd1dd_0.conda#c752c0eb6c250919559172c011e5f65b https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.0-ha4da562_3.conda#40a36f8e9a6fdf6a78c6428ee6c44188 -https://conda.anaconda.org/conda-forge/osx-64/python-3.12.1-h9f0c242_1_cpython.conda#41d5549764b9f37199e6255e5e9daee6 +https://conda.anaconda.org/conda-forge/osx-64/python-3.12.2-h9f0c242_0_cpython.conda#0179b8007ba008cf5bec11f3b3853902 https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.11.0-h7728843_1.conda#29e29beba9deb0ef66bee015c5bf3c14 https://conda.anaconda.org/conda-forge/osx-64/ccache-4.9.1-h41adc32_0.conda#45aaf96b67840bd98a928de8679098fa https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-973.0.1-ha1c5b94_16.conda#00eb71204323fa6449b38dd34ab9c65d @@ -77,22 +77,22 @@ https://conda.anaconda.org/conda-forge/noarch/packaging-23.2-pyhd8ed1ab_0.conda# https://conda.anaconda.org/conda-forge/osx-64/pillow-10.2.0-py312h0c70c2f_0.conda#0cc3674239ad12c6836cb4174f106c92 https://conda.anaconda.org/conda-forge/noarch/pluggy-1.4.0-pyhd8ed1ab_0.conda#139e9feb65187e916162917bb2484976 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.4-pyhd8ed1ab_0.conda#c79cacf8a06a51552fc651652f170208 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.0.3-pyhd8ed1ab_0.conda#40695fdfd15a92121ed2922900d0308b +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.1-pyhd8ed1ab_0.conda#576de899521b7d43674ba3ef6eae9142 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/osx-64/tornado-6.3.3-py312h104f124_1.conda#6835d4940d6fbd41e1a32d58dfae8f06 +https://conda.anaconda.org/conda-forge/osx-64/tornado-6.4-py312h41838bb_0.conda#2d2d1fde5800d45cb56218583156d23d https://conda.anaconda.org/conda-forge/osx-64/cctools-973.0.1-h40f6528_16.conda#b7234c329d4503600b032f168f4b65e7 https://conda.anaconda.org/conda-forge/osx-64/clang-16.0.6-hdae98eb_5.conda#5f020dce5a00342141d87f952c9c0282 -https://conda.anaconda.org/conda-forge/osx-64/coverage-7.4.1-py312h41838bb_0.conda#4a89ca53df4faeca1b88d63f12267433 -https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.48.1-py312h41838bb_0.conda#f7db6992aa780fca60ec35fb2cfe012f +https://conda.anaconda.org/conda-forge/osx-64/coverage-7.4.3-py312h41838bb_1.conda#996062eaf469432f208a7573c2482e3d +https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.49.0-py312h41838bb_0.conda#910043c784378419df3160b7661ee915 https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-20_osx64_mkl.conda#160fdc97a51d66d51dc782fb67d35205 https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2023.2.0-h694c41f_50500.conda#1b4d0235ef253a1e19459351badf4f9f -https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.0-pyhd8ed1ab_0.conda#5ba1cc5b924226349d4a49fb547b7579 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.2-pyhd8ed1ab_0.conda#40bd3ef942b9642a3eb20b0bbf92469b https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/osx-64/clangxx-16.0.6-default_h7151d67_5.conda#8c3fb5d2005174683f3958383643e335 https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-20_osx64_mkl.conda#51089a4865eb4aec2bc5c7468bd07f9f @@ -105,14 +105,14 @@ https://conda.anaconda.org/conda-forge/osx-64/numpy-1.26.4-py312he3a82b2_0.conda https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-20_osx64_mkl.conda#cc3260179093918b801e373c6e888e02 https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-16.0.6-ha38d28d_2.conda#3b9e8c5c63b8e86234f499490acd85c2 https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.2.0-py312hbf0bb39_0.conda#74190e06053cda7139a0cb71f3e618fd -https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.0-py312h83c8a23_0.conda#b5a2e09aa631f35983fe291fcc340f6e +https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.1-py312h83c8a23_0.conda#c562e07382cdc3194c21b8eca06460ff https://conda.anaconda.org/conda-forge/osx-64/scipy-1.12.0-py312h8adb940_2.conda#b16a9767f5f4b0a0ec8fb566e2c586f7 https://conda.anaconda.org/conda-forge/osx-64/blas-2.120-mkl.conda#b041a7677a412f3d925d8208936cb1e2 https://conda.anaconda.org/conda-forge/osx-64/clang_impl_osx-64-16.0.6-h8787910_9.conda#36dc72f20205cf43f63765334a5f0be7 -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.8.2-py312h302682c_0.conda#6a3b7c29d663a9cda13afb8f2638cc46 +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.8.3-py312h1fe5000_0.conda#5f65fc4ce880d4c795e217d563a114ec https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.0.1-py312h674694f_1.conda#e5b9c0f8b5c367467425ff34353ef761 https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-16.0.6-hb91bd55_9.conda#3ebda8406efd8c09ebeeba80396ac6bd -https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.8.2-py312hb401068_0.conda#926f479dcab7d6d26bba7fe39f67e3b2 +https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.8.3-py312hb401068_0.conda#7015bf84c9d39284c4746d814da2a0f1 https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.7.0-h282daa2_0.conda#4652f33fe8d895f61177e2783b289377 https://conda.anaconda.org/conda-forge/osx-64/clangxx_impl_osx-64-16.0.6-h6d92fbe_9.conda#bfea277f004e2815ebd59294e9c08746 https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-12.3.0-h18f7dce_1.conda#436af2384c47aedb94af78a128e174f1 diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock index cb3b126678024..a5a0ae3ae0ecd 100644 --- a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock +++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock @@ -1,6 +1,6 @@ # Generated by conda-lock. # platform: osx-64 -# input_hash: 7dcf380a01bb97d3b32f38a0a9e6fcb11d8a71356477798b9a61987d26f479dd +# input_hash: 0852937217d7f245972202bbf4d45e87bae0b554b334e0a6a351c65ba033ae17 @EXPLICIT https://repo.anaconda.com/pkgs/main/osx-64/blas-1.0-mkl.conda#cb2c87e85ac8e0ceae776d26d4214c8a https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h1de35cc_0.conda#19fcb113b170fe2a0be96b47801fed7d @@ -13,7 +13,7 @@ https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.4.4-hecd8cb5_0.conda#c20b268 https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.3.2-h6c40b1e_0.conda#d8fd9f599dd4e012694e69d119016442 https://repo.anaconda.com/pkgs/main/osx-64/llvm-openmp-14.0.6-h0dcd299_0.conda#b5804d32b87dc61ca94561ade33d5f2d https://repo.anaconda.com/pkgs/main/osx-64/ncurses-6.4-hcec6c5f_0.conda#0214d1ee980e217fabc695f1e40662aa -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023d-h04d1e81_0.conda#fdb319536f351b2b828a350ffd1a35a1 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 https://repo.anaconda.com/pkgs/main/osx-64/xz-5.4.5-h6c40b1e_0.conda#351c5d33fe551018a2068e7a2ca8a6c1 https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4dc903c_0.conda#d0202dd912bfb45d3422786531717882 https://repo.anaconda.com/pkgs/main/osx-64/ccache-3.7.9-hf120daa_0.conda#a01515a32e721c51d631283f991bc8ea @@ -60,7 +60,7 @@ https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.3.3-py312h6c40b1e_0.conda#49173b5a36c9134865221f29d4a73fb6 https://repo.anaconda.com/pkgs/main/osx-64/wheel-0.41.2-py312hecd8cb5_0.conda#e7aea266d81142e2bb0bbc2280e64526 https://repo.anaconda.com/pkgs/main/noarch/fonttools-4.25.0-pyhd3eb1b0_0.conda#bb9c5b5a6d892fca5efe4bf0203b6a48 -https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.26.3-py312h6f81483_0.conda#58c3bc6c19210583249b16d69f9bdb0a +https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.26.4-py312h6f81483_0.conda#87f73efbf26ab2e2ea7c32481a71bd47 https://repo.anaconda.com/pkgs/main/osx-64/pillow-10.2.0-py312h6c40b1e_0.conda#5a44bd28cf26fff2d6219e76a86db126 https://repo.anaconda.com/pkgs/main/osx-64/pip-23.3.1-py312hecd8cb5_0.conda#efc3db40cac09f74bb480d28d3a0b260 https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.4.0-py312hecd8cb5_0.conda#b816a2439ba9b87524aec74d58e55b0a @@ -73,7 +73,7 @@ https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.8.0-py312hecd8cb5_0.cond https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.8.0-py312h7f12edd_0.conda#bda389e5a1ff69f763911cf90102893b https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.8-py312h6c40b1e_0.conda#d59d01b940493f2b6a84aac922fd0c76 https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.4-py312ha357a0b_0.conda#c1ea9c8eee79a5af3399f3c31be0e9c6 -https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.26.3-py312hac873b0_0.conda#d2310a3607112d4b042330d0140434ef +https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.26.4-py312hac873b0_0.conda#3150bac1e382156f82a153229e1ebd06 https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.7-py312hac873b0_0.conda#6303ba071636ef57fddf69eb6f440ec1 https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.11.4-py312h81688c2_0.conda#7d57b4c21a9261f97fa511e0940c5d93 https://repo.anaconda.com/pkgs/main/osx-64/pandas-2.1.4-py312he282a81_0.conda#dcbed31bc94e03cc6f53312e0fb4eb49 diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock index c74a207d4c8ba..3c4332c12ab6a 100644 --- a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock +++ b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock @@ -5,7 +5,7 @@ https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9 https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.12.12-h06a4308_0.conda#12bf7315c3f5ca50300e8b48d1b4ef2e https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023d-h04d1e81_0.conda#fdb319536f351b2b828a350ffd1a35a1 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 @@ -32,7 +32,7 @@ https://repo.anaconda.com/pkgs/main/linux-64/pip-23.3.1-py39h06a4308_0.conda#685 # pip docutils @ https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl#sha256=96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6 # pip exceptiongroup @ https://files.pythonhosted.org/packages/b8/9a/5028fd52db10e600f1c4674441b968cf2ea4959085bfb5b99fb1250e5f68/exceptiongroup-1.2.0-py3-none-any.whl#sha256=4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14 # pip execnet @ https://files.pythonhosted.org/packages/e8/9c/a079946da30fac4924d92dbc617e5367d454954494cf1e71567bcc4e00ee/execnet-2.0.2-py3-none-any.whl#sha256=88256416ae766bc9e8895c76a87928c0012183da3cc4fc18016e6f050e025f41 -# pip fonttools @ https://files.pythonhosted.org/packages/ef/02/1e18cc5249b2e9cdd1d6c231373c4ba7ad18ff3ac9164b1ffcac6ed0aa35/fonttools-4.48.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=c900508c46274d32d308ae8e82335117f11aaee1f7d369ac16502c9a78930b0a +# pip fonttools @ https://files.pythonhosted.org/packages/d8/d7/0f4563ea45c14c84fde44aca3cb0896e49d1d960ba1298e789b75b1d2625/fonttools-4.49.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=1f255ce8ed7556658f6d23f6afd22a6d9bbc3edb9b96c96682124dc487e1bf42 # pip idna @ https://files.pythonhosted.org/packages/c2/e7/a82b05cf63a603df6e68d59ae6a68bf5064484a0718ea5033660af4b54a9/idna-3.6-py3-none-any.whl#sha256=c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b # pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 @@ -57,25 +57,25 @@ https://repo.anaconda.com/pkgs/main/linux-64/pip-23.3.1-py39h06a4308_0.conda#685 # pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/80/b3/1beac14a88654d2e5120d0143b49be5ad450b86eb1963523d8dbdcc51eb2/sphinxcontrib_qthelp-1.0.7-py3-none-any.whl#sha256=e2ae3b5c492d58fcbd73281fbd27e34b8393ec34a073c792642cd8e529288182 # pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/38/24/228bb903ea87b9e08ab33470e6102402a644127108c7117ac9c00d849f82/sphinxcontrib_serializinghtml-1.1.10-py3-none-any.whl#sha256=326369b8df80a7d2d8d7f99aa5ac577f51ea51556ed974e7716cfd4fca3f6cb7 # pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f -# pip threadpoolctl @ https://files.pythonhosted.org/packages/81/12/fd4dea011af9d69e1cad05c75f3f7202cdcbeac9b712eea58ca779a72865/threadpoolctl-3.2.0-py3-none-any.whl#sha256=2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032 +# pip threadpoolctl @ https://files.pythonhosted.org/packages/b1/2c/f504e55d98418f2fcf756a56877e6d9a45dd5ed28b3d7c267b300e85ad5b/threadpoolctl-3.3.0-py3-none-any.whl#sha256=6155be1f4a39f31a18ea70f94a77e0ccd57dced08122ea61109e7da89883781e # pip tomli @ https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl#sha256=939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc # pip tzdata @ https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl#sha256=9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252 -# pip urllib3 @ https://files.pythonhosted.org/packages/88/75/311454fd3317aefe18415f04568edc20218453b709c63c58b9292c71be17/urllib3-2.2.0-py3-none-any.whl#sha256=ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224 +# pip urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.whl#sha256=450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d # pip zipp @ https://files.pythonhosted.org/packages/d9/66/48866fc6b158c81cc2bfecc04c480f105c6040e8b077bc54c634b4a67926/zipp-3.17.0-py3-none-any.whl#sha256=0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 # pip contourpy @ https://files.pythonhosted.org/packages/a9/ba/d8fd1380876f1e9114157606302e3644c85f6d116aeba354c212ee13edc7/contourpy-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=11f8d2554e52f459918f7b8e6aa20ec2a3bce35ce95c1f0ef4ba36fbda306df5 -# pip coverage @ https://files.pythonhosted.org/packages/ff/e3/351477165426da841458f2c1b732360dd42da140920e3cd4b70676e5b77f/coverage-7.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=d12c923757de24e4e2110cf8832d83a886a4cf215c6e61ed506006872b43a6d1 +# pip coverage @ https://files.pythonhosted.org/packages/b5/ad/effc12b8f72321cb847c5ba7f4ea7ce3e5c19c641f6418131f8fb0ab2f61/coverage-7.4.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8640f1fde5e1b8e3439fe482cdc2b0bb6c329f4bb161927c28d2e8879c6029ee # pip imageio @ https://files.pythonhosted.org/packages/02/25/66533a8390e3763cf8254dee143dbf8a830391ea60d2762512ba7f9ddfbe/imageio-2.34.0-py3-none-any.whl#sha256=08082bf47ccb54843d9c73fe9fc8f3a88c72452ab676b58aca74f36167e8ccba # pip importlib-metadata @ https://files.pythonhosted.org/packages/c0/8b/d8427f023c081a8303e6ac7209c16e6878f2765d5b59667f3903fbcfd365/importlib_metadata-7.0.1-py3-none-any.whl#sha256=4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e -# pip importlib-resources @ https://files.pythonhosted.org/packages/93/e8/facde510585869b5ec694e8e0363ffe4eba067cb357a8398a55f6a1f8023/importlib_resources-6.1.1-py3-none-any.whl#sha256=e8bf90d8213b486f428c9c39714b920041cb02c184686a3dee24905aaa8105d6 +# pip importlib-resources @ https://files.pythonhosted.org/packages/ba/0b/27d13042335942abc29a87f49f1ce6b56fa58e025e96454ef25929aeb603/importlib_resources-6.1.2-py3-none-any.whl#sha256=9a0a862501dc38b68adebc82970140c9e4209fc99601782925178f8386339938 # pip jinja2 @ https://files.pythonhosted.org/packages/30/6d/6de6be2d02603ab56e72997708809e8a5b0fbfee080735109b40a3564843/Jinja2-3.1.3-py3-none-any.whl#sha256=7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa -# pip pytest @ https://files.pythonhosted.org/packages/c7/10/727155d44c5e04bb08e880668e53079547282e4f950535234e5a80690564/pytest-8.0.0-py3-none-any.whl#sha256=50fb9cbe836c3f20f0dfa99c565201fb75dc54c8d76373cd1bde06b06657bdb6 +# pip pytest @ https://files.pythonhosted.org/packages/a7/ea/d0ab9595a0d4b2320483e634123171deaf50885e29d442180efcbf2ed0b2/pytest-8.0.2-py3-none-any.whl#sha256=edfaaef32ce5172d5466b5127b42e0d6d35ebbe4453f0e3505d96afd93f6b096 # pip python-dateutil @ https://files.pythonhosted.org/packages/36/7a/87837f39d0296e723bb9b62bbb257d0355c7f6128853c78955f57342a56d/python_dateutil-2.8.2-py2.py3-none-any.whl#sha256=961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 # pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f # pip scipy @ https://files.pythonhosted.org/packages/a6/9d/f864266894b67cdb5731ab531afba68713da3d6d8252f698ccab775d3f68/scipy-1.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6546dc2c11a9df6926afcbdd8a3edec28566e4e785b915e849348c6dd9f3f490 -# pip tifffile @ https://files.pythonhosted.org/packages/16/09/b9f5e4f9448fd39b7c0c9cbb592409ab28e90a1913795260b975d8424cde/tifffile-2024.1.30-py3-none-any.whl#sha256=40cb48f661acdfea16cb00dc8941bd642b8eb5c59bca6de6a54091bee9ee2699 +# pip tifffile @ https://files.pythonhosted.org/packages/cd/0b/33610b4d0d1bb83a6bfd20ed838f52e02a44e9b439116cd4f3d424e81a80/tifffile-2024.2.12-py3-none-any.whl#sha256=870998f82fbc94ff7c3528884c1b0ae54863504ff51dbebea431ac3fa8fb7c21 # pip lightgbm @ https://files.pythonhosted.org/packages/ba/11/cb8b67f3cbdca05b59a032bb57963d4fe8c8d18c3870f30bed005b7f174d/lightgbm-4.3.0-py3-none-manylinux_2_28_x86_64.whl#sha256=104496a3404cb2452d3412cbddcfbfadbef9c372ea91e3a9b8794bcc5183bf07 -# pip matplotlib @ https://files.pythonhosted.org/packages/53/1f/653d60d2ec81a6095fa3e571cf2de57742bab8a51a5c01de26730ce3dc53/matplotlib-3.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5864bdd7da445e4e5e011b199bb67168cdad10b501750367c496420f2ad00843 -# pip pandas @ https://files.pythonhosted.org/packages/df/bc/663c52528d6b2c796d0f788655e5f0fd65842523715a18f4d4beaca8dcb2/pandas-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=eb61dc8567b798b969bcc1fc964788f5a68214d333cade8319c7ab33e2b5d88a +# pip matplotlib @ https://files.pythonhosted.org/packages/35/82/ca05c3e3ec4a38eaf49a9bfa1a700658284ddaaa2e2523fa91fbb96d207a/matplotlib-3.8.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6728dde0a3997396b053602dbd907a9bd64ec7d5cf99e728b404083698d3ca01 +# pip pandas @ https://files.pythonhosted.org/packages/1a/5e/71bb0eef0dc543f7516d9ddeca9ee8dc98207043784e3f7e6c08b4a6b3d9/pandas-2.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f9d3558d263073ed95e46f4650becff0c5e1ffe0fc3a015de3c79283dfbdb3df # pip pyamg @ https://files.pythonhosted.org/packages/35/1c/8b2aa6fbb2bae258ab6cdb35b09635bf50865ac2bcdaf220db3d972cc0d8/pyamg-5.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=1332acec6d5ede9440c8ced0ef20952f5b766387116f254b79880ce29fdecee7 # pip pytest-cov @ https://files.pythonhosted.org/packages/a7/4b/8b78d126e275efa2379b1c2e09dc52cf70df16fc3b90613ef82531499d73/pytest_cov-4.1.0-py3-none-any.whl#sha256=6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a # pip pytest-xdist @ https://files.pythonhosted.org/packages/50/37/125fe5ec459321e2d48a0c38672cfc2419ad87d580196fd894e5f25230b0/pytest_xdist-3.5.0-py3-none-any.whl#sha256=d075629c7e00b611df89f490a5063944bee7a4362a5ff11c7cc7824a03dfce24 diff --git a/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock index 560605a638883..3627639a05147 100644 --- a/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock +++ b/build_tools/azure/pymin_conda_defaults_openblas_linux-64_conda.lock @@ -7,12 +7,13 @@ https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.12.12-h06a4308_0.conda#12bf7315c3f5ca50300e8b48d1b4ef2e https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b https://repo.anaconda.com/pkgs/main/linux-64/libgfortran5-11.2.0-h1234567_1.conda#36a01a8c30e0cadf0d3e842c50b73f3b -https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023d-h04d1e81_0.conda#fdb319536f351b2b828a350ffd1a35a1 +https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25 https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-11.2.0-h00389a5_1.conda#7429b67ab7b1d7cb99b9d1f3ddaec6e3 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464 +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h7b6447c_0.conda#9303f4af7c004e069bae22bde8d800ee https://repo.anaconda.com/pkgs/main/linux-64/expat-2.5.0-h6a678d5_0.conda#9a21d99d49a0a556cf9590430dec8ec0 https://repo.anaconda.com/pkgs/main/linux-64/fftw-3.3.9-h27cfd23_1.conda#d266674fbd3345d45a69896e1bdef8be https://repo.anaconda.com/pkgs/main/linux-64/icu-73.1-h6a678d5_0.conda#6d09df641fc23f7d277a04dc7ea32dd4 @@ -20,6 +21,7 @@ https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h5eee18b_1.conda#ac373800fd https://repo.anaconda.com/pkgs/main/linux-64/lerc-3.0-h295c915_0.conda#b97309770412f10bed8d9448f6f98f87 https://repo.anaconda.com/pkgs/main/linux-64/libdeflate-1.17-h5eee18b_1.conda#82831ef0b6c9595382d74e0c281f6742 https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339 +https://repo.anaconda.com/pkgs/main/linux-64/libiconv-1.16-h7f8727e_2.conda#80d4bc7d7e58b5f0be41d763f60994f5 https://repo.anaconda.com/pkgs/main/linux-64/libopenblas-0.3.21-h043d6bf_0.conda#7f7324dcc3c4761a14f3e4ac443235a7 https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299 https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.3.2-h5eee18b_0.conda#9179fc7baefa1e027f572edbc519d805 @@ -27,30 +29,28 @@ https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518 https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.4-h6a678d5_0.conda#53915e9402180a7f22ea619c41089520 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.13-h7f8727e_0.conda#c73d46a4d666da0ae3dcd3fd8f805122 -https://repo.anaconda.com/pkgs/main/linux-64/pcre-8.45-h295c915_0.conda#b32ccc24d1d9808618c1e898da60f68d https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.5-h5eee18b_0.conda#fb0f709ab3eb6ad3538677c327646581 https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e -https://repo.anaconda.com/pkgs/main/linux-64/glib-2.69.1-he621ea3_2.conda#51cf1899782b3f3744aedd143fbc07f3 https://repo.anaconda.com/pkgs/main/linux-64/libcups-2.4.2-h2d74bed_1.conda#3f265c2172a9e8c90a74037b6fa13685 https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20230828-h5eee18b_0.conda#850eb5a9d2d7d3c66cce12e84406ca08 https://repo.anaconda.com/pkgs/main/linux-64/libllvm14-14.0.6-hdb19cb5_3.conda#aefea2b45cf32f12b4f1ffaa70aa3201 https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.39-h5eee18b_0.conda#f6aee38184512eb05b06c2e94d39ab22 https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.10.4-hf1b16e4_1.conda#e87849ce513f9968794f20bba620e6a4 +https://repo.anaconda.com/pkgs/main/linux-64/pcre2-10.42-hebb0a14_0.conda#fca6dea6ce1eddd0876a024f62c5097a https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9 https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.5-hc292b87_0.conda#0f59d57dc21f585f4c282d60dfb46505 -https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4 https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.12.1-h4a9f257_0.conda#bdc7b5952e9c5dca01bc2f4ccef2f974 -https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.1-h5eee18b_1.conda#f2f26e6f869b5d87f41bd059fae47c3e https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.20.1-h143b758_1.conda#cf1accc86321fa25d6b978cc748039ae https://repo.anaconda.com/pkgs/main/linux-64/libclang13-14.0.6-default_he11475f_1.conda#44890feda1cf51639d9c94afbacce011 +https://repo.anaconda.com/pkgs/main/linux-64/libglib-2.78.4-hdc74915_0.conda#2f6d27741e931d5b6ba56e1a1312aaf0 https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.5.1-h6a678d5_0.conda#235a671f74f0c4ecad9f9b3b107e3566 https://repo.anaconda.com/pkgs/main/linux-64/libxkbcommon-1.0.1-h5eee18b_1.conda#888b2e8f1bbf21017c503826e2d24b50 https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5 https://repo.anaconda.com/pkgs/main/linux-64/cyrus-sasl-2.1.28-h52b45da_1.conda#d634af1577e4008f9228ae96ce671c44 https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.14.1-h4c34cd2_2.conda#f0b472f5b544f8d57beb09ed4a2932e1 -https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1.conda#afd9cbe949d670d24cc0a007aaec1fe1 +https://repo.anaconda.com/pkgs/main/linux-64/glib-tools-2.78.4-h6a678d5_0.conda#3dbe6227cd59818dca9afb75ccb70708 https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff https://repo.anaconda.com/pkgs/main/linux-64/libclang-14.0.6-default_hc6dbbc7_1.conda#8f12583c4027b2861cff470f6b8837c4 https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.17-hdbd6064_0.conda#6bed363e25859faff66bf546a11c10e8 @@ -60,6 +60,7 @@ https://repo.anaconda.com/pkgs/main/linux-64/certifi-2024.2.2-py39h06a4308_0.con https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab https://repo.anaconda.com/pkgs/main/linux-64/exceptiongroup-1.2.0-py39h06a4308_0.conda#960e2cb83ac5134df8e593a130aa11af https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513 +https://repo.anaconda.com/pkgs/main/linux-64/glib-2.78.4-h6a678d5_0.conda#045ff487547f7b2b7ff01648681b8ebe https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507 https://repo.anaconda.com/pkgs/main/linux-64/joblib-1.2.0-py39h06a4308_0.conda#ac1f5687d70aa1128cbecb26bc9e559d https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.4-py39h6a678d5_0.conda#3d57aedbfbd054ce57fb3c1e4448828c @@ -79,17 +80,20 @@ https://repo.anaconda.com/pkgs/main/linux-64/tomli-2.0.1-py39h06a4308_0.conda#b0 https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.3.3-py39h5eee18b_0.conda#9c4bd985bb8adcd12f47e790e95a9333 https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.41.2-py39h06a4308_0.conda#ec1b8213c3585defaa6042ed2f95861d https://repo.anaconda.com/pkgs/main/linux-64/coverage-7.2.2-py39h5eee18b_0.conda#e9da151b7e1f56be2cb569c65949a1d2 +https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4 +https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.1-h5eee18b_1.conda#f2f26e6f869b5d87f41bd059fae47c3e https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.21.6-py39hac523dd_0.conda#a03c1fe16cf2558bca3838062c334d7d https://repo.anaconda.com/pkgs/main/linux-64/pip-23.3.1-py39h06a4308_0.conda#685007e3dae59d211620f19926577bd6 https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.4.0-py39h06a4308_0.conda#99d92a7a39f7e615de84f8cc5606c49a https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9 -https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h53bd1ea_10.conda#bd0c79e82df6323f638bdcb871891b61 https://repo.anaconda.com/pkgs/main/linux-64/sip-6.7.12-py39h6a678d5_0.conda#6988a3e12fcacfedcac523c1e4c3167c +https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1.conda#afd9cbe949d670d24cc0a007aaec1fe1 https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.3.4-py39h62a2d02_0.conda#dbab28222c740af8e21a3e5e2882c178 -https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.10-py39h6a678d5_0.conda#52da5ff9b1144b078d2f41bab0b213f2 https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.1.0-py39h06a4308_1.conda#8f41fce21670b120bf7fa8a7883380d9 https://repo.anaconda.com/pkgs/main/linux-64/pytest-xdist-3.5.0-py39h06a4308_0.conda#e1d7ffcb1ee2ed9a84800f5c4bbbd7ae https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.7.3-py39hf838250_2.conda#0667ea5ac14d35e26da19a0f068739da -https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.3.4-py39h06a4308_0.conda#384fc5e01ebfcf30e7161119d3029b5a https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py39h79cecc1_0.conda#afc634da8b81dc504179d53d334e6e55 +https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h53bd1ea_10.conda#bd0c79e82df6323f638bdcb871891b61 +https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.10-py39h6a678d5_0.conda#52da5ff9b1144b078d2f41bab0b213f2 +https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.3.4-py39h06a4308_0.conda#384fc5e01ebfcf30e7161119d3029b5a # pip cython @ https://files.pythonhosted.org/packages/c1/a7/606c4414a46d589114bf4de7eebeea315aae68283de095dd3e949d9c96d8/Cython-3.0.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=de892422582f5758bd8de187e98ac829330ec1007bc42c661f687792999988a7 diff --git a/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock index 751a06ec45efd..62a8c473962f5 100644 --- a/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock +++ b/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock @@ -36,7 +36,7 @@ https://conda.anaconda.org/conda-forge/win-64/gettext-0.21.1-h5728263_0.tar.bz2# https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.2-heb0366b_0.conda#6e8b0f22b4eef3b3cb3849bb4c3d47f9 https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-hcfcfb64_1.conda#19ce3e1dacc7912b3d6ff40690ba9ae0 https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-hcfcfb64_1.conda#71e890a0b361fd58743a13f77e1506b7 -https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.42-h19919ed_0.conda#9d97d0e6a5d51a7fd03c3398bc752890 +https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.43-h19919ed_0.conda#77e398acc32617a0384553aea29e866b https://conda.anaconda.org/conda-forge/win-64/libvorbis-1.3.7-h0e60522_0.tar.bz2#e1a22282de0169c93e4ffe6ce6acc212 https://conda.anaconda.org/conda-forge/win-64/libxml2-2.12.5-hc3477c8_0.conda#d8c3c1c8242db352f38cd1dc0bf44f77 https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de @@ -54,7 +54,7 @@ https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-hdaf720e_2.conda#3 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.5-py39h1f6ef14_1.conda#4fc5bd0a7b535252028c647cc27d6c87 https://conda.anaconda.org/conda-forge/win-64/libclang13-15.0.7-default_h85b4d89_4.conda#c6b0181860717a08469a324c4180ff2d -https://conda.anaconda.org/conda-forge/win-64/libglib-2.78.3-h16e383f_0.conda#c295badd19494ac8476b36e9e9e47ace +https://conda.anaconda.org/conda-forge/win-64/libglib-2.78.4-h16e383f_0.conda#72dc4e1cdde0894015567c90f9c4e261 https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.9.3-default_haede6df_1009.conda#87da045f6d26ce9fe20ad76a18f6a18a https://conda.anaconda.org/conda-forge/win-64/libtiff-4.6.0-h6e2ebb7_2.conda#08d653b74ee2dec0131ad4259ffbb126 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19 @@ -63,20 +63,20 @@ https://conda.anaconda.org/conda-forge/noarch/pluggy-1.4.0-pyhd8ed1ab_0.conda#13 https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727 https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.0.3-pyhd8ed1ab_0.conda#40695fdfd15a92121ed2922900d0308b +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.1-pyhd8ed1ab_0.conda#576de899521b7d43674ba3ef6eae9142 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/win-64/tornado-6.3.3-py39ha55989b_1.conda#f00d59c26ab0fc20b1923270397cbba5 +https://conda.anaconda.org/conda-forge/win-64/tornado-6.4-py39ha55989b_0.conda#d8f52e8e1d02f9a5901f9224e2ddf98f https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.1.0-py39ha55989b_0.conda#20ec896e8d97f2ff8be1124e624dc8f2 https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7 https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.11-hcd874cb_0.conda#c46ba8712093cb0114404ae8a7582e1a https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-hcfcfb64_1.conda#f47f6db2528e38321fb00ae31674c133 -https://conda.anaconda.org/conda-forge/win-64/coverage-7.4.1-py39ha55989b_0.conda#6873406c3c78cd79dd60246a71934806 -https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.78.3-h12be248_0.conda#03c45e65dbac2ba6c247dfd4896b664c +https://conda.anaconda.org/conda-forge/win-64/coverage-7.4.3-py39ha55989b_1.conda#c68e9c43ed91b369a592d5268c9dac71 +https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.78.4-h12be248_0.conda#9e2a4c1cace3fbdeb11f20578484ddaf https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.1.1-pyhd8ed1ab_0.conda#3d5fa25cf42f3f32a12b2d874ace8574 https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc https://conda.anaconda.org/conda-forge/win-64/lcms2-2.16-h67d730c_0.conda#d3592435917b62a8becff3a60db674f6 @@ -84,12 +84,12 @@ https://conda.anaconda.org/conda-forge/win-64/libclang-15.0.7-default_hde6756a_4 https://conda.anaconda.org/conda-forge/win-64/libxcb-1.15-hcd874cb_0.conda#090d91b69396f14afef450c285f9758c https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.0-h3d672ee_3.conda#45a9628a04efb6fc326fff0a8f47b799 https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.0-pyhd8ed1ab_0.conda#5ba1cc5b924226349d4a49fb547b7579 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.2-pyhd8ed1ab_0.conda#40bd3ef942b9642a3eb20b0bbf92469b https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/win-64/sip-6.7.12-py39h99910a6_0.conda#0cc5774390ada632ed7975203057c91c https://conda.anaconda.org/conda-forge/win-64/tbb-2021.11.0-h91493d7_1.conda#21069f3ed16812f9f4f2700667b6ec86 -https://conda.anaconda.org/conda-forge/win-64/fonttools-4.48.1-py39ha55989b_0.conda#f87a0e69dd4ccf075f9a4010b4f724c1 -https://conda.anaconda.org/conda-forge/win-64/glib-2.78.3-h12be248_0.conda#a14440f1d004a2ddccd9c1354dbeffdf +https://conda.anaconda.org/conda-forge/win-64/fonttools-4.49.0-py39ha55989b_0.conda#3db31ee7eada607a636bd6d6105f7919 +https://conda.anaconda.org/conda-forge/win-64/glib-2.78.4-h12be248_0.conda#0080f150ed83685497f841f4b70fca1f https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.1.1-pyhd8ed1ab_0.conda#d04bd1b5bed9177dd7c3cef15e2b6710 https://conda.anaconda.org/conda-forge/win-64/mkl-2024.0.0-h66d3029_49657.conda#006b65d9cd436247dfe053df772e041d https://conda.anaconda.org/conda-forge/win-64/pillow-10.2.0-py39h368b509_0.conda#706d6e5bbc4b5d2ac7b8a6077319294d @@ -104,11 +104,11 @@ https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-21_win64_mkl.conda# https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-21_win64_mkl.conda#c4740f091cb75987390087934354a621 https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-21_win64_mkl.conda#a4844669ed07bb5b7f182e9ca4de2a70 https://conda.anaconda.org/conda-forge/win-64/numpy-1.26.4-py39hddb5d58_0.conda#6e30ff8f2d3f59f45347dfba8bc22a04 -https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-h9e85ed6_18.conda#8427460072b90560c0675c37c30386ef +https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-h9e85ed6_19.conda#1e5fa5b05768a8eed9d8bb0bf5585b1f https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-21_win64_mkl.conda#dfb57411138b9548b9d6c65f7fe6af32 https://conda.anaconda.org/conda-forge/win-64/contourpy-1.2.0-py39h1f6ef14_0.conda#9eeea323eacb6549cbb3df3d81181cb2 https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.9-py39hb77abff_5.conda#5ed899124a51958336371ff01482b8fd https://conda.anaconda.org/conda-forge/win-64/scipy-1.12.0-py39hddb5d58_2.conda#e421d27a09f9131514436f8233125766 https://conda.anaconda.org/conda-forge/win-64/blas-2.121-mkl.conda#87ae78b8197b890e5a429f91769dfac7 -https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.8.2-py39hf19769e_0.conda#90a864bf689259d6a08a0c55037fd69c -https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.8.2-py39hcbf5309_0.conda#92625f78e662841feb70511ff466207c +https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.8.3-py39hf19769e_0.conda#e7a42adb568586ff4035d7ef2d06c4b1 +https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.8.3-py39hcbf5309_0.conda#a4b5946f68ecaed034fa849b8d639e63 diff --git a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock index 59abc098bdb28..d6a6b0aece42f 100644 --- a/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock +++ b/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock @@ -63,8 +63,8 @@ https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2. https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.42-h2797004_0.conda#d67729828dc6ff7ba44a61062ad79880 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.48-h71f35ed_0.conda#4d18d86916705d352d5f4adfb7f0edd3 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.1-h2797004_0.conda#fc4ccadfbf6d4784de88c41704792562 https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c @@ -80,7 +80,7 @@ https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.cond https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.3-h783c2da_0.conda#9bd06b12bbfa6fd1740fd23af4b0f0c7 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.4-h783c2da_0.conda#d86baf8740d1a906b9716f2a0bac2f2d https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.26-pthreads_h413a1c8_0.conda#760ae35415f5ba8b15d09df5afe8b23a @@ -88,7 +88,7 @@ https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.cond https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-ha9c0a0a_2.conda#55ed21669b2015f77c180feb1dd41930 https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-17.0.6-h4dfa4b3_0.conda#c1665f9c1c9f6c93d8b4e492a6a39056 https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_6.conda#e87530d1b12dd7f4e0f856dc07358d60 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.97-h1d7d5a4_0.conda#b916d71a3032416e3f9136090d814472 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 https://conda.anaconda.org/conda-forge/linux-64/python-3.9.18-h0755675_1_cpython.conda#255a7002aeec7a067ff19b545aca6328 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 @@ -109,7 +109,7 @@ https://conda.anaconda.org/conda-forge/linux-64/docutils-0.20.1-py39hf3d152e_3.c https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda#8d652ea2ee8eaee02ed8dc820bc794aa https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.3-hfc55251_0.conda#41d2f46e0ac8372eeb959860713d9b21 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.4-hfc55251_0.conda#d184ba1bf15a2bbb3be6118c90fd487d https://conda.anaconda.org/conda-forge/noarch/idna-3.6-pyhd8ed1ab_0.conda#1a76f09108576397c41c0b0c5bd84134 https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5 @@ -130,17 +130,17 @@ https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd715 https://conda.anaconda.org/conda-forge/noarch/pygments-2.17.2-pyhd8ed1ab_0.conda#140a7f159396547e9799aa98f9f0742e https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.4-pyhd8ed1ab_0.conda#c79cacf8a06a51552fc651652f170208 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.0.3-pyhd8ed1ab_0.conda#40695fdfd15a92121ed2922900d0308b +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.1-pyhd8ed1ab_0.conda#576de899521b7d43674ba3ef6eae9142 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py39hd1e30aa_1.conda#cbe186eefb0bcd91e8f47c3908489874 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e @@ -149,8 +149,8 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_ https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.48.1-py39hd1e30aa_0.conda#402ef3d9608c7653187a3fd6fd45b445 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.3-hfc55251_0.conda#e08e51acc7d1ae8dbe13255e7b4c64ac +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.49.0-py39hd1e30aa_0.conda#dd1b02484cc8c31d4093111a82b6efb2 +https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.4-hfc55251_0.conda#f36a7b2420c3fc3c48a3d609841d8fee https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.0.1-pyha770c72_0.conda#746623a787e06191d80a2133e5daff17 https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.1.1-pyhd8ed1ab_0.conda#3d5fa25cf42f3f32a12b2d874ace8574 https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.3-pyhd8ed1ab_0.conda#e7d8df6509ba635247ff9aea31134262 @@ -161,10 +161,10 @@ https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-21_linux64_openb https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.6.0-hd429924_1.conda#1dbcc04604fdf1e526e6d1b0b6938396 https://conda.anaconda.org/conda-forge/linux-64/pillow-10.2.0-py39had0adad_0.conda#2972754dc054bb079d1d121918b5126f https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_5.conda#ac902ff3c1c6d750dd0dfc93a974ab74 -https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.0-pyhd8ed1ab_0.conda#5ba1cc5b924226349d4a49fb547b7579 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.2-pyhd8ed1ab_0.conda#40bd3ef942b9642a3eb20b0bbf92469b https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.0-pyhd8ed1ab_0.conda#6a7e0694921f668a030d52f0c47baebd +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.9-h98fc4e7_0.conda#bcc7157b06fce7f5e055402a8135dfd8 https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.3.0-h3d44ed6_0.conda#5a6f6c00ef982a9bc83558d9ac8f64a0 https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.1.1-pyhd8ed1ab_0.conda#d04bd1b5bed9177dd7c3cef15e2b6710 @@ -176,14 +176,14 @@ https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-21_linux64_openblas.conda#77cefbfb4d47ba8cafef8e3f768a4538 https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.0-py39h7633fee_0.conda#ed71ad3e30eb03da363fb797419cce98 https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.9-h8e1006c_0.conda#614b81f8ed66c56b640faee7076ad14a -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.0-py39hddac248_0.conda#95aaa7baa61432a1ce85dedb7b86d2dd +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.1-py39hddac248_0.conda#85293a042c24a08e71b7608ee66b6134 https://conda.anaconda.org/conda-forge/linux-64/scipy-1.12.0-py39h474f0d3_2.conda#6ab241b2023730f6b41712dc1b503afa https://conda.anaconda.org/conda-forge/linux-64/blas-2.121-openblas.conda#4a279792fd8861a15705516a52872eb6 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.2-py39he9076e7_0.conda#6085411aa2f0b2b801d3b46e1d3b83c5 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.3-py39he9076e7_0.conda#5456bdfe5809ebf5689eda6c808b686e https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py39hda80f44_1.conda#6df47699edb4d8d3365de2d189a456bc -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h450f30e_18.conda#ef0430f8df5dcdedcaaab340b228f30c +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h5810be5_19.conda#54866f708d43002a514d0b9b0f84bc11 https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.2-py39hf3d152e_0.conda#18d40a5ada9a801cabaf5d47c15c6282 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.3-py39hf3d152e_0.conda#983f5b77540eb5aa00238e72ec9b1dfb https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.6.0-pyhd8ed1ab_0.conda#191b8a622191a403700d16a2008e4e29 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.8-pyhd8ed1ab_0.conda#611a35a27914fac3aa37611a6fe40bb5 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.6-pyhd8ed1ab_0.conda#d7e4954df0d3aea2eacc7835ad12671d diff --git a/build_tools/azure/ubuntu_atlas_lock.txt b/build_tools/azure/ubuntu_atlas_lock.txt index d2a0e285efa1b..4a17f57a74ef2 100644 --- a/build_tools/azure/ubuntu_atlas_lock.txt +++ b/build_tools/azure/ubuntu_atlas_lock.txt @@ -18,7 +18,7 @@ packaging==23.2 # via pytest pluggy==1.4.0 # via pytest -pytest==8.0.0 +pytest==8.0.2 # via # -r build_tools/azure/ubuntu_atlas_requirements.txt # pytest-xdist diff --git a/build_tools/circle/doc_linux-64_conda.lock b/build_tools/circle/doc_linux-64_conda.lock index 3a0b121e56a1c..df19726249807 100644 --- a/build_tools/circle/doc_linux-64_conda.lock +++ b/build_tools/circle/doc_linux-64_conda.lock @@ -8,7 +8,7 @@ https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda#6185f640c43843e5ad6fd1c5372c3f80 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_16.conda#7ca122655873935e02c91279c5b03c8c +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h8bca6fd_105.conda#e12ce6b051085b8f27e239f5e5f5bce5 https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h8bca6fd_105.conda#b3c6062c84a8e172555ee104ea6a01ab @@ -17,7 +17,7 @@ https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4 https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda#d211c42b9ce49aee3734fdc828731689 -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_16.conda#071ea8dceff4d30ac511f4a2f8437cd1 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-hf600244_0.conda#33084421a8c0af6aef1b439707f7662a https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-hdd6e379_0.conda#ccc940fddbc3fcd3d79cd4c654c4b5c4 @@ -86,8 +86,8 @@ https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2. https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.42-h2797004_0.conda#d67729828dc6ff7ba44a61062ad79880 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.48-h71f35ed_0.conda#4d18d86916705d352d5f4adfb7f0edd3 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.1-h2797004_0.conda#fc4ccadfbf6d4784de88c41704792562 https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c @@ -109,14 +109,14 @@ https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-hf https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-he2b93b0_5.conda#cddba8fd94e52012abea1caad722b9c2 https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.3-h783c2da_0.conda#9bd06b12bbfa6fd1740fd23af4b0f0c7 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.4-h783c2da_0.conda#d86baf8740d1a906b9716f2a0bac2f2d https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.26-pthreads_h413a1c8_0.conda#760ae35415f5ba8b15d09df5afe8b23a https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-ha9c0a0a_2.conda#55ed21669b2015f77c180feb1dd41930 https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-17.0.6-h4dfa4b3_0.conda#c1665f9c1c9f6c93d8b4e492a6a39056 https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_6.conda#e87530d1b12dd7f4e0f856dc07358d60 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.97-h1d7d5a4_0.conda#b916d71a3032416e3f9136090d814472 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 https://conda.anaconda.org/conda-forge/linux-64/python-3.9.18-h0755675_1_cpython.conda#255a7002aeec7a067ff19b545aca6328 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 @@ -139,7 +139,7 @@ https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#6 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h499e0f7_2.conda#0558a8c44eb7a18e6682bd3a8ae6dcab https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h7fe76b4_2.conda#3a749210487c0358b6f135a648cbbf60 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.3-hfc55251_0.conda#41d2f46e0ac8372eeb959860713d9b21 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.4-hfc55251_0.conda#d184ba1bf15a2bbb3be6118c90fd487d https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h8d2909c_2.conda#673bac341be6b90ef9e8abae7e52ca46 https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-h8a814eb_2.conda#f517b1525e9783849bd56a5dc45a9960 https://conda.anaconda.org/conda-forge/noarch/idna-3.6-pyhd8ed1ab_0.conda#1a76f09108576397c41c0b0c5bd84134 @@ -166,19 +166,19 @@ https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.8-py39hd1e30aa_0.cond https://conda.anaconda.org/conda-forge/noarch/pygments-2.17.2-pyhd8ed1ab_0.conda#140a7f159396547e9799aa98f9f0742e https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025 -https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.4-pyhd8ed1ab_0.conda#c79cacf8a06a51552fc651652f170208 +https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad -https://conda.anaconda.org/conda-forge/noarch/setuptools-69.0.3-pyhd8ed1ab_0.conda#40695fdfd15a92121ed2922900d0308b +https://conda.anaconda.org/conda-forge/noarch/setuptools-69.1.1-pyhd8ed1ab_0.conda#576de899521b7d43674ba3ef6eae9142 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.3-pyhd8ed1ab_0.conda#1482e77f87c6a702a7e05ef22c9b197b -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py39hd1e30aa_1.conda#cbe186eefb0bcd91e8f47c3908489874 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.9.0-pyha770c72_0.conda#a92a6440c3fe7052d63244f3aba2a4a7 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.10.0-pyha770c72_0.conda#16ae769069b380646c47142d719ef466 https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8 https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 @@ -190,9 +190,9 @@ https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#96 https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f907bb958910dc404647326ca80c263e https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_0.conda#b4537c98cb59f8725b0e1e65816b4a28 -https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.48.1-py39hd1e30aa_0.conda#402ef3d9608c7653187a3fd6fd45b445 +https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.49.0-py39hd1e30aa_0.conda#dd1b02484cc8c31d4093111a82b6efb2 https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_0.conda#7ef7c0f111dad1c8006504a0f1ccd820 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.3-hfc55251_0.conda#e08e51acc7d1ae8dbe13255e7b4c64ac +https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.4-hfc55251_0.conda#f36a7b2420c3fc3c48a3d609841d8fee https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.0.1-pyha770c72_0.conda#746623a787e06191d80a2133e5daff17 https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.1.1-pyhd8ed1ab_0.conda#3d5fa25cf42f3f32a12b2d874ace8574 https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.3-pyhd8ed1ab_0.conda#e7d8df6509ba635247ff9aea31134262 @@ -204,12 +204,12 @@ https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.6.0-hd429924_1.co https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b https://conda.anaconda.org/conda-forge/linux-64/pillow-10.2.0-py39had0adad_0.conda#2972754dc054bb079d1d121918b5126f https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 -https://conda.anaconda.org/conda-forge/noarch/plotly-5.18.0-pyhd8ed1ab_0.conda#9f6a8664f1fe752f79473eeb9bf33a60 +https://conda.anaconda.org/conda-forge/noarch/plotly-5.19.0-pyhd8ed1ab_0.conda#669cd7065794633b9e64e6a9612ec700 https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_5.conda#ac902ff3c1c6d750dd0dfc93a974ab74 -https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.0-pyhd8ed1ab_0.conda#5ba1cc5b924226349d4a49fb547b7579 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.2-pyhd8ed1ab_0.conda#40bd3ef942b9642a3eb20b0bbf92469b https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.0-pyhd8ed1ab_0.conda#6a7e0694921f668a030d52f0c47baebd +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_0.conda#81458b3aed8ab8711951ec3c0c04e097 https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.9-h98fc4e7_0.conda#bcc7157b06fce7f5e055402a8135dfd8 https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.3.0-h3d44ed6_0.conda#5a6f6c00ef982a9bc83558d9ac8f64a0 @@ -223,23 +223,23 @@ https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-21_linux64_open https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.0-py39h7633fee_0.conda#ed71ad3e30eb03da363fb797419cce98 https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.9-h8e1006c_0.conda#614b81f8ed66c56b640faee7076ad14a https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2024.1.1-py39hf9b8f0e_0.conda#9ddd29852457d1152ca235eb87bc74fb -https://conda.anaconda.org/conda-forge/noarch/imageio-2.33.1-pyh8c1a49c_0.conda#1c34d58ac469a34e7e96832861368bce -https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.0-py39hddac248_0.conda#95aaa7baa61432a1ce85dedb7b86d2dd +https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.0-pyh4b66e23_0.conda#b8853659d596f967c661f544dd89ede7 +https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.1-py39hddac248_0.conda#85293a042c24a08e71b7608ee66b6134 https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 -https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.7-py39h927a070_0.conda#24a2968bb1f6630daa0da4368aeeeb64 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.0-pyhd8ed1ab_0.conda#134b2b57b7865d2316a7cce1915a51ed +https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.10-py39h927a070_0.conda#2c626921a52a9571bda297ef0fceb15a +https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.1-pyhd8ed1ab_0.conda#d15917f33140f8d2ac9ca44db7ec8a25 https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h44dd56e_1.conda#d037c20e3da2e85f03ebd20ad480c359 https://conda.anaconda.org/conda-forge/linux-64/scipy-1.12.0-py39h474f0d3_2.conda#6ab241b2023730f6b41712dc1b503afa https://conda.anaconda.org/conda-forge/linux-64/blas-2.121-openblas.conda#4a279792fd8861a15705516a52872eb6 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.2-py39he9076e7_0.conda#6085411aa2f0b2b801d3b46e1d3b83c5 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.3-py39he9076e7_0.conda#5456bdfe5809ebf5689eda6c808b686e https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py39hda80f44_1.conda#6df47699edb4d8d3365de2d189a456bc -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h450f30e_18.conda#ef0430f8df5dcdedcaaab340b228f30c +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h5810be5_19.conda#54866f708d43002a514d0b9b0f84bc11 https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.1-py39h44dd56e_0.conda#dc565186b972bd87e49b9c35390ddd8c -https://conda.anaconda.org/conda-forge/noarch/tifffile-2024.1.30-pyhd8ed1ab_0.conda#9ae618ad19f5b39955c9f2e43b8d03c3 +https://conda.anaconda.org/conda-forge/noarch/tifffile-2024.2.12-pyhd8ed1ab_0.conda#d5c8bef52be4e70c48b1400eec3eecc8 https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1 https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.22.0-py39hddac248_2.conda#8d502a4d2cbe5a45ff35ca8af8cbec0a https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.13.2-pyhd8ed1ab_0.conda#0918a9201e824211cdf444dbf8d55752 -https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.2-py39hf3d152e_0.conda#18d40a5ada9a801cabaf5d47c15c6282 +https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.3-py39hf3d152e_0.conda#983f5b77540eb5aa00238e72ec9b1dfb https://conda.anaconda.org/conda-forge/noarch/seaborn-0.13.2-hd8ed1ab_0.conda#fd31ebf5867914de597f9961c478e482 https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.6.0-pyhd8ed1ab_0.conda#191b8a622191a403700d16a2008e4e29 https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995 @@ -257,22 +257,22 @@ https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1 # pip defusedxml @ https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl#sha256=a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61 # pip fastjsonschema @ https://files.pythonhosted.org/packages/9c/b9/79691036d4a8f9857e74d1728b23f34f583b81350a27492edda58d5604e1/fastjsonschema-2.19.1-py3-none-any.whl#sha256=3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0 # pip fqdn @ https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl#sha256=3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014 -# pip json5 @ https://files.pythonhosted.org/packages/70/ba/fa37123a86ae8287d6678535a944f9c3377d8165e536310ed6f6cb0f0c0e/json5-0.9.14-py2.py3-none-any.whl#sha256=740c7f1b9e584a468dbb2939d8d458db3427f2c93ae2139d05f47e453eae964f +# pip json5 @ https://files.pythonhosted.org/packages/7c/c3/da3b0c409453ae2d39bcfd04007249fa2f50005d365609a7497a4bbb81f1/json5-0.9.17-py2.py3-none-any.whl#sha256=f8ec1ecf985951d70f780f6f877c4baca6a47b6e61e02c4cd190138d10a7805a # pip jsonpointer @ https://files.pythonhosted.org/packages/12/f6/0232cc0c617e195f06f810534d00b74d2f348fe71b2118009ad8ad31f878/jsonpointer-2.4-py2.py3-none-any.whl#sha256=15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a # pip jupyterlab-pygments @ https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl#sha256=841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780 # pip mistune @ https://files.pythonhosted.org/packages/f0/74/c95adcdf032956d9ef6c89a9b8a5152bf73915f8c633f3e3d88d06bd699c/mistune-3.0.2-py3-none-any.whl#sha256=71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205 # pip overrides @ https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl#sha256=c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49 # pip pandocfilters @ https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl#sha256=93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc # pip pkginfo @ https://files.pythonhosted.org/packages/b3/f2/6e95c86a23a30fa205ea6303a524b20cbae27fbee69216377e3d95266406/pkginfo-1.9.6-py3-none-any.whl#sha256=4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546 -# pip prometheus-client @ https://files.pythonhosted.org/packages/bb/9f/ad934418c48d01269fc2af02229ff64bcf793fd5d7f8f82dc5e7ea7ef149/prometheus_client-0.19.0-py3-none-any.whl#sha256=c88b1e6ecf6b41cd8fb5731c7ae919bf66df6ec6fafa555cd6c0e16ca169ae92 +# pip prometheus-client @ https://files.pythonhosted.org/packages/c7/98/745b810d822103adca2df8decd4c0bbe839ba7ad3511af3f0d09692fc0f0/prometheus_client-0.20.0-py3-none-any.whl#sha256=cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7 # pip ptyprocess @ https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 # pip pycparser @ https://files.pythonhosted.org/packages/62/d5/5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53/pycparser-2.21-py2.py3-none-any.whl#sha256=8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 # pip python-json-logger @ https://files.pythonhosted.org/packages/35/a6/145655273568ee78a581e734cf35beb9e33a370b29c5d3c8fee3744de29f/python_json_logger-2.0.7-py3-none-any.whl#sha256=f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd # pip pyyaml @ https://files.pythonhosted.org/packages/7d/39/472f2554a0f1e825bd7c5afc11c817cd7a2f3657460f7159f691fbb37c51/PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c # pip rfc3986-validator @ https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl#sha256=2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9 -# pip rpds-py @ https://files.pythonhosted.org/packages/c2/e9/190521d63b504c12bdcffb27ea6aaac1dbb2521be983c3a2a0ab4a938b8c/rpds_py-0.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=dfe07308b311a8293a0d5ef4e61411c5c20f682db6b5e73de6c7c8824272c256 +# pip rpds-py @ https://files.pythonhosted.org/packages/fd/ea/92231b62681961812e9fbd8ef9be7137856784406bf6a384976bb7b46472/rpds_py-0.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9 # pip send2trash @ https://files.pythonhosted.org/packages/a9/78/e4df1e080ed790acf3a704edf521006dd96b9841bd2e2a462c0d255e0565/Send2Trash-1.8.2-py3-none-any.whl#sha256=a384719d99c07ce1eefd6905d2decb6f8b7ed054025bb0e618919f945de4f679 -# pip sniffio @ https://files.pythonhosted.org/packages/c3/a0/5dba8ed157b0136607c7f2151db695885606968d1fae123dc3391e0cfdbf/sniffio-1.3.0-py3-none-any.whl#sha256=eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384 +# pip sniffio @ https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl#sha256=2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 # pip soupsieve @ https://files.pythonhosted.org/packages/4c/f3/038b302fdfbe3be7da016777069f26ceefe11a681055ea1f7817546508e3/soupsieve-2.5-py3-none-any.whl#sha256=eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7 # pip traitlets @ https://files.pythonhosted.org/packages/45/34/5dc77fdc7bb4bd198317eea5679edf9cc0a186438b5b19dbb9062fb0f4d5/traitlets-5.14.1-py3-none-any.whl#sha256=2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74 # pip types-python-dateutil @ https://files.pythonhosted.org/packages/28/50/8ed67814241e2684369f4b8b881c7d31a0816e76c8690ea8518017a35b7e/types_python_dateutil-2.8.19.20240106-py3-none-any.whl#sha256=efbbdc54590d0f16152fa103c9879c7d4a00e82078f6e2cf01769042165acaa2 @@ -280,7 +280,7 @@ https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1 # pip webcolors @ https://files.pythonhosted.org/packages/d5/e1/3e9013159b4cbb71df9bd7611cbf90dc2c621c8aeeb677fc41dad72f2261/webcolors-1.13-py3-none-any.whl#sha256=29bc7e8752c0a1bd4a1f03c14d6e6a72e93d82193738fa860cbff59d0fcc11bf # pip webencodings @ https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl#sha256=a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 # pip websocket-client @ https://files.pythonhosted.org/packages/1e/70/1e88138a9afbed1d37093b85f0bebc3011623c4f47c166431599fe9d6c93/websocket_client-1.7.0-py3-none-any.whl#sha256=f4c3d22fec12a2461427a29957ff07d35098ee2d976d3ba244e688b8b4057588 -# pip anyio @ https://files.pythonhosted.org/packages/bf/cd/d6d9bb1dadf73e7af02d18225cbd2c93f8552e13130484f1c8dcfece292b/anyio-4.2.0-py3-none-any.whl#sha256=745843b39e829e108e518c489b31dc757de7d2131d53fac32bd8df268227bfee +# pip anyio @ https://files.pythonhosted.org/packages/14/fd/2f20c40b45e4fb4324834aea24bd4afdf1143390242c0b33774da0e2e34f/anyio-4.3.0-py3-none-any.whl#sha256=048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8 # pip arrow @ https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl#sha256=c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80 # pip beautifulsoup4 @ https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl#sha256=b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed # pip bleach @ https://files.pythonhosted.org/packages/ea/63/da7237f805089ecc28a3f36bca6a21c31fcbc2eb380f3b8f1be3312abd14/bleach-6.1.0-py3-none-any.whl#sha256=3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6 @@ -300,11 +300,11 @@ https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.9.1-pyhd8ed1 # pip argon2-cffi @ https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl#sha256=c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea # pip jsonschema @ https://files.pythonhosted.org/packages/39/9d/b035d024c62c85f2e2d4806a59ca7b8520307f34e0932fbc8cc75fe7b2d9/jsonschema-4.21.1-py3-none-any.whl#sha256=7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f # pip jupyter-client @ https://files.pythonhosted.org/packages/43/ae/5f4f72980765e2e5e02b260f9c53bcc706cefa7ac9c8d7240225c55788d4/jupyter_client-8.6.0-py3-none-any.whl#sha256=909c474dbe62582ae62b758bca86d6518c85234bdee2d908c778db6d72f39d99 -# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/08/19/2ef7099e28a9e411e1eb901edb089e43c0321128651c35c6051baba36577/jupyterlite_pyodide_kernel-0.2.2-py3-none-any.whl#sha256=d452e5a4fc5af1cf84073b339b0033e9d4726c9978fe414036604ddecf39ed10 +# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/41/3f/b1e5d76beaddd94f47cc40b8430cca1e178c3acc53cce8556156991845ac/jupyterlite_pyodide_kernel-0.2.3-py3-none-any.whl#sha256=32b30d0f5ea5b87470cd36f824589e705c9bdaa8f7072d534aea04ec2c7993dc # pip jupyter-events @ https://files.pythonhosted.org/packages/e3/55/0c1aa72f4317e826a471dc4adc3036acd11d496ded68c4bbac2a88551519/jupyter_events-0.9.0-py3-none-any.whl#sha256=d853b3c10273ff9bc8bb8b30076d65e2c9685579db736873de6c2232dde148bf # pip nbformat @ https://files.pythonhosted.org/packages/f4/e7/ef30a90b70eba39e675689b9eaaa92530a71d7435ab8f9cae520814e0caf/nbformat-5.9.2-py3-none-any.whl#sha256=1c5172d786a41b82bcfd0c23f9e6b6f072e8fb49c39250219e4acfff1efe89e9 # pip nbclient @ https://files.pythonhosted.org/packages/6b/3a/607149974149f847125c38a62b9ea2b8267eb74823bbf8d8c54ae0212a00/nbclient-0.9.0-py3-none-any.whl#sha256=a3a1ddfb34d4a9d17fc744d655962714a866639acd30130e9be84191cd97cd15 -# pip nbconvert @ https://files.pythonhosted.org/packages/c9/ec/c120b21e7f884a701e12a241992754e719adaf430d0d6b30c6655776bc35/nbconvert-7.16.0-py3-none-any.whl#sha256=ad3dc865ea6e2768d31b7eb6c7ab3be014927216a5ece3ef276748dd809054c7 +# pip nbconvert @ https://files.pythonhosted.org/packages/dc/6f/2c4e3dafb36dff2c98a170c1d61275f2e2d6bfd0f07d25771c1c18a6a529/nbconvert-7.16.1-py3-none-any.whl#sha256=3188727dffadfdc9c6a1c7250729063d7bc78b355ad7aa023138afa030d1cd07 # pip jupyter-server @ https://files.pythonhosted.org/packages/25/d6/6ee093c967d11144aeb1b0b4952d30e51da8eb2737837ab612084c783a58/jupyter_server-2.12.5-py3-none-any.whl#sha256=184a0f82809a8522777cfb6b760ab6f4b1bb398664c5860a27cec696cb884923 -# pip jupyterlab-server @ https://files.pythonhosted.org/packages/a2/97/abbbe35fc67b6f9423309988f2e411f7cb117b08321866d3d8b720f4c0d4/jupyterlab_server-2.25.2-py3-none-any.whl#sha256=5b1798c9cc6a44f65c757de9f97fc06fc3d42535afbf47d2ace5e964ab447aaf +# pip jupyterlab-server @ https://files.pythonhosted.org/packages/ab/ac/a19c579bb8ab2a2aefcf47cd3787683e6e136378d7ab2602be3b8e628030/jupyterlab_server-2.25.3-py3-none-any.whl#sha256=c48862519fded9b418c71645d85a49b2f0ec50d032ba8316738e9276046088c1 # pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/9c/bd/1695eebeb376315c9fc5cbd41c54fb84bb69c68e69651bfc6f03aa4fe659/jupyterlite_sphinx-0.11.0-py3-none-any.whl#sha256=2a0762167e89ec6acd267c73bb90b528728fdba5e30390ea4fe37ddcec277191 diff --git a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock index 55ffd9f203b5c..e8b32dea9ca63 100644 --- a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock +++ b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock @@ -8,7 +8,7 @@ https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_1.conda#6185f640c43843e5ad6fd1c5372c3f80 -https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_16.conda#7ca122655873935e02c91279c5b03c8c +https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_17.conda#d731b543793afc0433c4fd593e693fce https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3 https://conda.anaconda.org/conda-forge/noarch/libgcc-devel_linux-64-12.3.0-h8bca6fd_105.conda#e12ce6b051085b8f27e239f5e5f5bce5 https://conda.anaconda.org/conda-forge/noarch/libstdcxx-devel_linux-64-12.3.0-h8bca6fd_105.conda#b3c6062c84a8e172555ee104ea6a01ab @@ -18,7 +18,7 @@ https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-4_cp39.conda#bfe4 https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_5.conda#d211c42b9ce49aee3734fdc828731689 -https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_16.conda#071ea8dceff4d30ac511f4a2f8437cd1 +https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_17.conda#595db67e32b276298ff3d94d07d47fbf https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-hf600244_0.conda#33084421a8c0af6aef1b439707f7662a https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-hdd6e379_0.conda#ccc940fddbc3fcd3d79cd4c654c4b5c4 @@ -72,8 +72,8 @@ https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2. https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869 https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_5.conda#e73e9cfd1191783392131e6238bdb3e9 -https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a -https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.42-h2797004_0.conda#d67729828dc6ff7ba44a61062ad79880 +https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.48-h71f35ed_0.conda#4d18d86916705d352d5f4adfb7f0edd3 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.45.1-h2797004_0.conda#fc4ccadfbf6d4784de88c41704792562 https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c @@ -92,14 +92,14 @@ https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-hf https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-he2b93b0_5.conda#cddba8fd94e52012abea1caad722b9c2 https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.3-hd590300_0.conda#32d16ad533c59bb0a3c5ffaf16110829 -https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.3-h783c2da_0.conda#9bd06b12bbfa6fd1740fd23af4b0f0c7 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.4-h783c2da_0.conda#d86baf8740d1a906b9716f2a0bac2f2d https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.9.3-default_h554bfaf_1009.conda#f36ddc11ca46958197a45effdd286e45 https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-ha9c0a0a_2.conda#55ed21669b2015f77c180feb1dd41930 https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-17.0.6-h4dfa4b3_0.conda#c1665f9c1c9f6c93d8b4e492a6a39056 https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_6.conda#e87530d1b12dd7f4e0f856dc07358d60 -https://conda.anaconda.org/conda-forge/linux-64/nss-3.97-h1d7d5a4_0.conda#b916d71a3032416e3f9136090d814472 +https://conda.anaconda.org/conda-forge/linux-64/nss-3.98-h1d7d5a4_0.conda#54b56c2fdf973656b748e0378900ec13 https://conda.anaconda.org/conda-forge/linux-64/python-3.9.18-h0755675_1_cpython.conda#255a7002aeec7a067ff19b545aca6328 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913 @@ -124,7 +124,7 @@ https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.con https://conda.anaconda.org/conda-forge/noarch/fsspec-2024.2.0-pyhca7485f_0.conda#fad86b90138cf5d82c6f5a2ed6e683d9 https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h499e0f7_2.conda#0558a8c44eb7a18e6682bd3a8ae6dcab https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h7fe76b4_2.conda#3a749210487c0358b6f135a648cbbf60 -https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.3-hfc55251_0.conda#41d2f46e0ac8372eeb959860713d9b21 +https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.4-hfc55251_0.conda#d184ba1bf15a2bbb3be6118c90fd487d https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h8d2909c_2.conda#673bac341be6b90ef9e8abae7e52ca46 https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-h8a814eb_2.conda#f517b1525e9783849bd56a5dc45a9960 https://conda.anaconda.org/conda-forge/noarch/idna-3.6-pyhd8ed1ab_0.conda#1a76f09108576397c41c0b0c5bd84134 @@ -156,12 +156,12 @@ https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087 https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.11.0-h00ab1b0_1.conda#4531d2927578e7e254ff3bcf6457518c https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.3-pyhd8ed1ab_0.conda#1482e77f87c6a702a7e05ef22c9b197b -https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86 +https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.3.0-pyhc1e730c_0.conda#698d2d2b621640bddb9191f132967c9f https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96 https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.1-pyhd8ed1ab_0.conda#2fcb582444635e2c402e8569bb94e039 -https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py39hd1e30aa_1.conda#cbe186eefb0bcd91e8f47c3908489874 -https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.9.0-pyha770c72_0.conda#a92a6440c3fe7052d63244f3aba2a4a7 +https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4-py39hd1e30aa_0.conda#1e865e9188204cdfb1fd2531780add88 +https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.10.0-pyha770c72_0.conda#16ae769069b380646c47142d719ef466 https://conda.anaconda.org/conda-forge/noarch/wheel-0.42.0-pyhd8ed1ab_0.conda#1cdea58981c5cbc17b51973bcaddcea7 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73 https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.41-hd590300_0.conda#81f740407b45e3f9047b3174fa94eb9e @@ -173,7 +173,7 @@ https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-h3faef2a_0.conda#f9 https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.7.0-h00ab1b0_0.conda#b4537c98cb59f8725b0e1e65816b4a28 https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.3-py39hd1e30aa_0.conda#dc0fb8e157c7caba4c98f1e1f9d2e5f4 https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.7.0-heb67821_0.conda#7ef7c0f111dad1c8006504a0f1ccd820 -https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.3-hfc55251_0.conda#e08e51acc7d1ae8dbe13255e7b4c64ac +https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.4-hfc55251_0.conda#f36a7b2420c3fc3c48a3d609841d8fee https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-7.0.1-pyha770c72_0.conda#746623a787e06191d80a2133e5daff17 https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.3-pyhd8ed1ab_0.conda#e7d8df6509ba635247ff9aea31134262 https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc @@ -186,10 +186,10 @@ https://conda.anaconda.org/conda-forge/linux-64/pillow-10.2.0-py39had0adad_0.con https://conda.anaconda.org/conda-forge/noarch/pip-24.0-pyhd8ed1ab_0.conda#f586ac1e56c8638b64f9c8122a7b8a67 https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.0-pyhd8ed1ab_0.conda#6a7bcc42ef58dd6cf3da9333ea102433 https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_5.conda#ac902ff3c1c6d750dd0dfc93a974ab74 -https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.0-pyhd8ed1ab_0.conda#5ba1cc5b924226349d4a49fb547b7579 +https://conda.anaconda.org/conda-forge/noarch/pytest-8.0.2-pyhd8ed1ab_0.conda#40bd3ef942b9642a3eb20b0bbf92469b https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984 https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb -https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.0-pyhd8ed1ab_0.conda#6a7e0694921f668a030d52f0c47baebd +https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.1-pyhd8ed1ab_0.conda#08807a87fa7af10754d46f63b368e016 https://conda.anaconda.org/conda-forge/linux-64/compilers-1.7.0-ha770c72_0.conda#81458b3aed8ab8711951ec3c0c04e097 https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.9-h98fc4e7_0.conda#bcc7157b06fce7f5e055402a8135dfd8 https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.3.0-h3d44ed6_0.conda#5a6f6c00ef982a9bc83558d9ac8f64a0 @@ -203,13 +203,13 @@ https://conda.anaconda.org/conda-forge/noarch/dask-core-2024.2.0-pyhd8ed1ab_0.co https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.9-h8e1006c_0.conda#614b81f8ed66c56b640faee7076ad14a https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-21_linux64_mkl.conda#0553cad80ef02be86c8e178eeecb6a34 https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-21_linux64_mkl.conda#52837ab7fd5b43d3960c62e5c91958d6 -https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.0-pyhd8ed1ab_0.conda#134b2b57b7865d2316a7cce1915a51ed +https://conda.anaconda.org/conda-forge/noarch/pooch-1.8.1-pyhd8ed1ab_0.conda#d15917f33140f8d2ac9ca44db7ec8a25 https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-21_linux64_mkl.conda#0d45f03de7143f324b37454af46feb26 https://conda.anaconda.org/conda-forge/linux-64/numpy-1.19.5-py39hd249d9e_3.tar.bz2#0cf333996ebdeeba8d1c8c1c0ee9eff9 -https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h450f30e_18.conda#ef0430f8df5dcdedcaaab340b228f30c +https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h5810be5_19.conda#54866f708d43002a514d0b9b0f84bc11 https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-21_linux64_mkl.conda#67684e493802a70fd14fcf4b8872ae4d https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-lite-2019.12.3-py39hd257fcd_5.tar.bz2#32dba66d6abc2b4b5b019c9e54307312 -https://conda.anaconda.org/conda-forge/noarch/imageio-2.33.1-pyh8c1a49c_0.conda#1c34d58ac469a34e7e96832861368bce +https://conda.anaconda.org/conda-forge/noarch/imageio-2.34.0-pyh4b66e23_0.conda#b8853659d596f967c661f544dd89ede7 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.3.4-py39h2fa2bec_0.tar.bz2#9ec0b2186fab9121c54f4844f93ee5b7 https://conda.anaconda.org/conda-forge/linux-64/pandas-1.1.5-py39hde0f152_0.tar.bz2#79fc4b5b3a865b90dd3701cecf1ad33c https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.6-pyhd8ed1ab_0.conda#a5b55d1cb110cdcedc748b5c3e16e687 From d74a5a5c4c427c81292b15b92df8138e70fd94b9 Mon Sep 17 00:00:00 2001 From: Thanh Lam DANG <70220760+lamdang2k@users.noreply.github.com> Date: Mon, 26 Feb 2024 17:40:58 +0100 Subject: [PATCH 10/23] DOC Add a docstring example for sklearn.datasets.make_checkerboard (#28427) --- sklearn/datasets/_samples_generator.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index 1d1e65ff9966e..bf028443bdcae 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -2228,6 +2228,20 @@ def make_checkerboard( .. [1] Kluger, Y., Basri, R., Chang, J. T., & Gerstein, M. (2003). Spectral biclustering of microarray data: coclustering genes and conditions. Genome research, 13(4), 703-716. + + Examples + -------- + >>> from sklearn.datasets import make_checkerboard + >>> data, rows, columns = make_checkerboard(shape=(300, 300), n_clusters=10, + ... random_state=42) + >>> data.shape + (300, 300) + >>> rows.shape + (100, 300) + >>> columns.shape + (100, 300) + >>> print(rows[0][:5], columns[0][:5]) + [False False False True False] [False False False False False] """ generator = check_random_state(random_state) From d16f0495feb7b781015262b1ef407ddc0e737136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= Date: Mon, 26 Feb 2024 23:18:47 +0100 Subject: [PATCH 11/23] MAINT cleanup utils.__init__: move chunking tools into dedicated submodule (#28516) --- sklearn/cluster/_optics.py | 3 +- sklearn/datasets/_arff_parser.py | 4 +- sklearn/ensemble/_iforest.py | 2 +- sklearn/metrics/pairwise.py | 2 +- sklearn/utils/__init__.py | 173 +------------------------- sklearn/utils/_chunking.py | 175 +++++++++++++++++++++++++++ sklearn/utils/tests/test_chunking.py | 73 +++++++++++ sklearn/utils/tests/test_utils.py | 69 ----------- 8 files changed, 258 insertions(+), 243 deletions(-) create mode 100644 sklearn/utils/_chunking.py create mode 100644 sklearn/utils/tests/test_chunking.py diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py index 230e7ae2129df..b2a0c4d642a00 100755 --- a/sklearn/cluster/_optics.py +++ b/sklearn/cluster/_optics.py @@ -21,7 +21,8 @@ from ..metrics import pairwise_distances from ..metrics.pairwise import _VALID_METRICS, PAIRWISE_BOOLEAN_FUNCTIONS from ..neighbors import NearestNeighbors -from ..utils import gen_batches, get_chunk_n_rows +from ..utils import gen_batches +from ..utils._chunking import get_chunk_n_rows from ..utils._param_validation import ( HasMethods, Interval, diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py index 73613f835ad19..5c427441012d6 100644 --- a/sklearn/datasets/_arff_parser.py +++ b/sklearn/datasets/_arff_parser.py @@ -10,7 +10,7 @@ from ..externals import _arff from ..externals._arff import ArffSparseDataType -from ..utils import _chunk_generator, get_chunk_n_rows +from ..utils._chunking import chunk_generator, get_chunk_n_rows from ..utils._optional_dependencies import check_pandas_support from ..utils.fixes import pd_fillna @@ -192,7 +192,7 @@ def _io_to_generator(gzip_file): # read arff data with chunks columns_to_keep = [col for col in columns_names if col in columns_to_select] dfs = [first_df[columns_to_keep]] - for data in _chunk_generator(arff_container["data"], chunksize): + for data in chunk_generator(arff_container["data"], chunksize): dfs.append( pd.DataFrame(data, columns=columns_names, copy=False)[columns_to_keep] ) diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py index 870bd9cfd9b6c..480d1f2d3e4ef 100644 --- a/sklearn/ensemble/_iforest.py +++ b/sklearn/ensemble/_iforest.py @@ -16,8 +16,8 @@ check_array, check_random_state, gen_batches, - get_chunk_n_rows, ) +from ..utils._chunking import get_chunk_n_rows from ..utils._param_validation import Interval, RealNotInt, StrOptions from ..utils.validation import _num_samples, check_is_fitted from ._bagging import BaseBagging diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 4da6529436b94..7b09bfd70a2fd 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -24,8 +24,8 @@ check_array, gen_batches, gen_even_slices, - get_chunk_n_rows, ) +from ..utils._chunking import get_chunk_n_rows from ..utils._mask import _get_mask from ..utils._missing import is_scalar_nan from ..utils._param_validation import ( diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index a67166cadb9fc..9a2481393271a 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -14,12 +14,12 @@ import numpy as np from scipy.sparse import issparse -from .. import get_config from ..exceptions import DataConversionWarning from . import _joblib, metadata_routing from ._bunch import Bunch +from ._chunking import gen_batches, gen_even_slices from ._estimator_html_repr import estimator_html_repr -from ._param_validation import Integral, Interval, validate_params +from ._param_validation import Interval, validate_params from .class_weight import compute_class_weight, compute_sample_weight from .deprecation import deprecated from .discovery import all_estimators @@ -76,6 +76,8 @@ "Bunch", "metadata_routing", "safe_sqr", + "gen_batches", + "gen_even_slices", ] IS_PYPY = platform.python_implementation() == "PyPy" @@ -745,132 +747,6 @@ def shuffle(*arrays, random_state=None, n_samples=None): ) -def _chunk_generator(gen, chunksize): - """Chunk generator, ``gen`` into lists of length ``chunksize``. The last - chunk may have a length less than ``chunksize``.""" - while True: - chunk = list(islice(gen, chunksize)) - if chunk: - yield chunk - else: - return - - -@validate_params( - { - "n": [Interval(numbers.Integral, 1, None, closed="left")], - "batch_size": [Interval(numbers.Integral, 1, None, closed="left")], - "min_batch_size": [Interval(numbers.Integral, 0, None, closed="left")], - }, - prefer_skip_nested_validation=True, -) -def gen_batches(n, batch_size, *, min_batch_size=0): - """Generator to create slices containing `batch_size` elements from 0 to `n`. - - The last slice may contain less than `batch_size` elements, when - `batch_size` does not divide `n`. - - Parameters - ---------- - n : int - Size of the sequence. - batch_size : int - Number of elements in each batch. - min_batch_size : int, default=0 - Minimum number of elements in each batch. - - Yields - ------ - slice of `batch_size` elements - - See Also - -------- - gen_even_slices: Generator to create n_packs slices going up to n. - - Examples - -------- - >>> from sklearn.utils import gen_batches - >>> list(gen_batches(7, 3)) - [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)] - >>> list(gen_batches(6, 3)) - [slice(0, 3, None), slice(3, 6, None)] - >>> list(gen_batches(2, 3)) - [slice(0, 2, None)] - >>> list(gen_batches(7, 3, min_batch_size=0)) - [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)] - >>> list(gen_batches(7, 3, min_batch_size=2)) - [slice(0, 3, None), slice(3, 7, None)] - """ - start = 0 - for _ in range(int(n // batch_size)): - end = start + batch_size - if end + min_batch_size > n: - continue - yield slice(start, end) - start = end - if start < n: - yield slice(start, n) - - -@validate_params( - { - "n": [Interval(Integral, 1, None, closed="left")], - "n_packs": [Interval(Integral, 1, None, closed="left")], - "n_samples": [Interval(Integral, 1, None, closed="left"), None], - }, - prefer_skip_nested_validation=True, -) -def gen_even_slices(n, n_packs, *, n_samples=None): - """Generator to create `n_packs` evenly spaced slices going up to `n`. - - If `n_packs` does not divide `n`, except for the first `n % n_packs` - slices, remaining slices may contain fewer elements. - - Parameters - ---------- - n : int - Size of the sequence. - n_packs : int - Number of slices to generate. - n_samples : int, default=None - Number of samples. Pass `n_samples` when the slices are to be used for - sparse matrix indexing; slicing off-the-end raises an exception, while - it works for NumPy arrays. - - Yields - ------ - `slice` representing a set of indices from 0 to n. - - See Also - -------- - gen_batches: Generator to create slices containing batch_size elements - from 0 to n. - - Examples - -------- - >>> from sklearn.utils import gen_even_slices - >>> list(gen_even_slices(10, 1)) - [slice(0, 10, None)] - >>> list(gen_even_slices(10, 10)) - [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)] - >>> list(gen_even_slices(10, 5)) - [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)] - >>> list(gen_even_slices(10, 3)) - [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)] - """ - start = 0 - for pack_num in range(n_packs): - this_n = n // n_packs - if pack_num < n % n_packs: - this_n += 1 - if this_n > 0: - end = start + this_n - if n_samples is not None: - end = min(n_samples, end) - yield slice(start, end, None) - start = end - - def tosequence(x): """Cast iterable x to a Sequence, avoiding a copy if possible. @@ -1012,44 +888,3 @@ def _print_elapsed_time(source, message=None): start = timeit.default_timer() yield print(_message_with_time(source, message, timeit.default_timer() - start)) - - -def get_chunk_n_rows(row_bytes, *, max_n_rows=None, working_memory=None): - """Calculate how many rows can be processed within `working_memory`. - - Parameters - ---------- - row_bytes : int - The expected number of bytes of memory that will be consumed - during the processing of each row. - max_n_rows : int, default=None - The maximum return value. - working_memory : int or float, default=None - The number of rows to fit inside this number of MiB will be - returned. When None (default), the value of - ``sklearn.get_config()['working_memory']`` is used. - - Returns - ------- - int - The number of rows which can be processed within `working_memory`. - - Warns - ----- - Issues a UserWarning if `row_bytes exceeds `working_memory` MiB. - """ - - if working_memory is None: - working_memory = get_config()["working_memory"] - - chunk_n_rows = int(working_memory * (2**20) // row_bytes) - if max_n_rows is not None: - chunk_n_rows = min(chunk_n_rows, max_n_rows) - if chunk_n_rows < 1: - warnings.warn( - "Could not adhere to working_memory config. " - "Currently %.0fMiB, %.0fMiB required." - % (working_memory, np.ceil(row_bytes * 2**-20)) - ) - chunk_n_rows = 1 - return chunk_n_rows diff --git a/sklearn/utils/_chunking.py b/sklearn/utils/_chunking.py new file mode 100644 index 0000000000000..7bf53d0626c85 --- /dev/null +++ b/sklearn/utils/_chunking.py @@ -0,0 +1,175 @@ +import warnings +from itertools import islice +from numbers import Integral + +import numpy as np + +from .._config import get_config +from ._param_validation import Interval, validate_params + + +def chunk_generator(gen, chunksize): + """Chunk generator, ``gen`` into lists of length ``chunksize``. The last + chunk may have a length less than ``chunksize``.""" + while True: + chunk = list(islice(gen, chunksize)) + if chunk: + yield chunk + else: + return + + +@validate_params( + { + "n": [Interval(Integral, 1, None, closed="left")], + "batch_size": [Interval(Integral, 1, None, closed="left")], + "min_batch_size": [Interval(Integral, 0, None, closed="left")], + }, + prefer_skip_nested_validation=True, +) +def gen_batches(n, batch_size, *, min_batch_size=0): + """Generator to create slices containing `batch_size` elements from 0 to `n`. + + The last slice may contain less than `batch_size` elements, when + `batch_size` does not divide `n`. + + Parameters + ---------- + n : int + Size of the sequence. + batch_size : int + Number of elements in each batch. + min_batch_size : int, default=0 + Minimum number of elements in each batch. + + Yields + ------ + slice of `batch_size` elements + + See Also + -------- + gen_even_slices: Generator to create n_packs slices going up to n. + + Examples + -------- + >>> from sklearn.utils import gen_batches + >>> list(gen_batches(7, 3)) + [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)] + >>> list(gen_batches(6, 3)) + [slice(0, 3, None), slice(3, 6, None)] + >>> list(gen_batches(2, 3)) + [slice(0, 2, None)] + >>> list(gen_batches(7, 3, min_batch_size=0)) + [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)] + >>> list(gen_batches(7, 3, min_batch_size=2)) + [slice(0, 3, None), slice(3, 7, None)] + """ + start = 0 + for _ in range(int(n // batch_size)): + end = start + batch_size + if end + min_batch_size > n: + continue + yield slice(start, end) + start = end + if start < n: + yield slice(start, n) + + +@validate_params( + { + "n": [Interval(Integral, 1, None, closed="left")], + "n_packs": [Interval(Integral, 1, None, closed="left")], + "n_samples": [Interval(Integral, 1, None, closed="left"), None], + }, + prefer_skip_nested_validation=True, +) +def gen_even_slices(n, n_packs, *, n_samples=None): + """Generator to create `n_packs` evenly spaced slices going up to `n`. + + If `n_packs` does not divide `n`, except for the first `n % n_packs` + slices, remaining slices may contain fewer elements. + + Parameters + ---------- + n : int + Size of the sequence. + n_packs : int + Number of slices to generate. + n_samples : int, default=None + Number of samples. Pass `n_samples` when the slices are to be used for + sparse matrix indexing; slicing off-the-end raises an exception, while + it works for NumPy arrays. + + Yields + ------ + `slice` representing a set of indices from 0 to n. + + See Also + -------- + gen_batches: Generator to create slices containing batch_size elements + from 0 to n. + + Examples + -------- + >>> from sklearn.utils import gen_even_slices + >>> list(gen_even_slices(10, 1)) + [slice(0, 10, None)] + >>> list(gen_even_slices(10, 10)) + [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)] + >>> list(gen_even_slices(10, 5)) + [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)] + >>> list(gen_even_slices(10, 3)) + [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)] + """ + start = 0 + for pack_num in range(n_packs): + this_n = n // n_packs + if pack_num < n % n_packs: + this_n += 1 + if this_n > 0: + end = start + this_n + if n_samples is not None: + end = min(n_samples, end) + yield slice(start, end, None) + start = end + + +def get_chunk_n_rows(row_bytes, *, max_n_rows=None, working_memory=None): + """Calculate how many rows can be processed within `working_memory`. + + Parameters + ---------- + row_bytes : int + The expected number of bytes of memory that will be consumed + during the processing of each row. + max_n_rows : int, default=None + The maximum return value. + working_memory : int or float, default=None + The number of rows to fit inside this number of MiB will be + returned. When None (default), the value of + ``sklearn.get_config()['working_memory']`` is used. + + Returns + ------- + int + The number of rows which can be processed within `working_memory`. + + Warns + ----- + Issues a UserWarning if `row_bytes exceeds `working_memory` MiB. + """ + + if working_memory is None: + working_memory = get_config()["working_memory"] + + chunk_n_rows = int(working_memory * (2**20) // row_bytes) + if max_n_rows is not None: + chunk_n_rows = min(chunk_n_rows, max_n_rows) + if chunk_n_rows < 1: + warnings.warn( + "Could not adhere to working_memory config. " + "Currently %.0fMiB, %.0fMiB required." + % (working_memory, np.ceil(row_bytes * 2**-20)) + ) + chunk_n_rows = 1 + return chunk_n_rows diff --git a/sklearn/utils/tests/test_chunking.py b/sklearn/utils/tests/test_chunking.py new file mode 100644 index 0000000000000..10c7ed17a0c2d --- /dev/null +++ b/sklearn/utils/tests/test_chunking.py @@ -0,0 +1,73 @@ +import warnings +from itertools import chain + +import pytest + +from sklearn import config_context +from sklearn.utils._chunking import gen_even_slices, get_chunk_n_rows +from sklearn.utils._testing import assert_array_equal + + +def test_gen_even_slices(): + # check that gen_even_slices contains all samples + some_range = range(10) + joined_range = list(chain(*[some_range[slice] for slice in gen_even_slices(10, 3)])) + assert_array_equal(some_range, joined_range) + + +@pytest.mark.parametrize( + ("row_bytes", "max_n_rows", "working_memory", "expected"), + [ + (1024, None, 1, 1024), + (1024, None, 0.99999999, 1023), + (1023, None, 1, 1025), + (1025, None, 1, 1023), + (1024, None, 2, 2048), + (1024, 7, 1, 7), + (1024 * 1024, None, 1, 1), + ], +) +def test_get_chunk_n_rows(row_bytes, max_n_rows, working_memory, expected): + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + actual = get_chunk_n_rows( + row_bytes=row_bytes, + max_n_rows=max_n_rows, + working_memory=working_memory, + ) + + assert actual == expected + assert type(actual) is type(expected) + with config_context(working_memory=working_memory): + with warnings.catch_warnings(): + warnings.simplefilter("error", UserWarning) + actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows) + assert actual == expected + assert type(actual) is type(expected) + + +def test_get_chunk_n_rows_warns(): + """Check that warning is raised when working_memory is too low.""" + row_bytes = 1024 * 1024 + 1 + max_n_rows = None + working_memory = 1 + expected = 1 + + warn_msg = ( + "Could not adhere to working_memory config. Currently 1MiB, 2MiB required." + ) + with pytest.warns(UserWarning, match=warn_msg): + actual = get_chunk_n_rows( + row_bytes=row_bytes, + max_n_rows=max_n_rows, + working_memory=working_memory, + ) + + assert actual == expected + assert type(actual) is type(expected) + + with config_context(working_memory=working_memory): + with pytest.warns(UserWarning, match=warn_msg): + actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows) + assert actual == expected + assert type(actual) is type(expected) diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index 0a19d2a11b144..d87a4a57c6ac2 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -2,13 +2,11 @@ import timeit import warnings from copy import copy -from itertools import chain from unittest import SkipTest import numpy as np import pytest -from sklearn import config_context from sklearn.externals._packaging.version import parse as parse_version from sklearn.utils import ( _determine_key_type, @@ -22,8 +20,6 @@ check_random_state, column_or_1d, deprecated, - gen_even_slices, - get_chunk_n_rows, resample, safe_mask, shuffle, @@ -558,71 +554,6 @@ def test_shuffle_dont_convert_to_array(csc_container): assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]])) -def test_gen_even_slices(): - # check that gen_even_slices contains all samples - some_range = range(10) - joined_range = list(chain(*[some_range[slice] for slice in gen_even_slices(10, 3)])) - assert_array_equal(some_range, joined_range) - - -@pytest.mark.parametrize( - ("row_bytes", "max_n_rows", "working_memory", "expected"), - [ - (1024, None, 1, 1024), - (1024, None, 0.99999999, 1023), - (1023, None, 1, 1025), - (1025, None, 1, 1023), - (1024, None, 2, 2048), - (1024, 7, 1, 7), - (1024 * 1024, None, 1, 1), - ], -) -def test_get_chunk_n_rows(row_bytes, max_n_rows, working_memory, expected): - with warnings.catch_warnings(): - warnings.simplefilter("error", UserWarning) - actual = get_chunk_n_rows( - row_bytes=row_bytes, - max_n_rows=max_n_rows, - working_memory=working_memory, - ) - - assert actual == expected - assert type(actual) is type(expected) - with config_context(working_memory=working_memory): - with warnings.catch_warnings(): - warnings.simplefilter("error", UserWarning) - actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows) - assert actual == expected - assert type(actual) is type(expected) - - -def test_get_chunk_n_rows_warns(): - """Check that warning is raised when working_memory is too low.""" - row_bytes = 1024 * 1024 + 1 - max_n_rows = None - working_memory = 1 - expected = 1 - - warn_msg = ( - "Could not adhere to working_memory config. Currently 1MiB, 2MiB required." - ) - with pytest.warns(UserWarning, match=warn_msg): - actual = get_chunk_n_rows( - row_bytes=row_bytes, - max_n_rows=max_n_rows, - working_memory=working_memory, - ) - - assert actual == expected - assert type(actual) is type(expected) - - with config_context(working_memory=working_memory): - with pytest.warns(UserWarning, match=warn_msg): - actual = get_chunk_n_rows(row_bytes=row_bytes, max_n_rows=max_n_rows) - assert actual == expected - assert type(actual) is type(expected) - - @pytest.mark.parametrize( ["source", "message", "is_long"], [ From 128e40ed593c57e8b9e57a4109928d58fa8bf359 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Tue, 27 Feb 2024 13:36:32 +1100 Subject: [PATCH 12/23] Fix typo in comment in `TSNE` (#28539) --- sklearn/manifold/_t_sne.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py index 348f26e83592c..e3e804fb0257d 100644 --- a/sklearn/manifold/_t_sne.py +++ b/sklearn/manifold/_t_sne.py @@ -1151,7 +1151,7 @@ def fit_transform(self, X, y=None): Embedding of the training data in low-dimensional space. """ # TODO(1.7): remove - # Also make sure to change `max_iter` default back to 1 and deprecate None + # Also make sure to change `max_iter` default back to 1000 and deprecate None if self.n_iter != "deprecated": if self.max_iter is not None: raise ValueError( From c254b55cf98da707acb9bb10134a530aa6a50ea8 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 27 Feb 2024 10:25:51 +0100 Subject: [PATCH 13/23] FIX typo fix, follow-up of #28160 (#28542) --- sklearn/datasets/_covtype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py index 1ecbd63ed7341..5d2055227141d 100644 --- a/sklearn/datasets/_covtype.py +++ b/sklearn/datasets/_covtype.py @@ -199,7 +199,7 @@ def fetch_covtype( with TemporaryDirectory(dir=covtype_dir) as temp_dir: logger.info(f"Downloading {ARCHIVE.url}") archive_path = _fetch_remote( - ARCHIVE, dirname=temp_dir, _retries=n_retries, delay=delay + ARCHIVE, dirname=temp_dir, n_retries=n_retries, delay=delay ) Xy = np.genfromtxt(GzipFile(filename=archive_path), delimiter=",") From 96305eed18f68af7b954ba0217ebf6cd46718fb1 Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Tue, 27 Feb 2024 14:23:57 +0100 Subject: [PATCH 14/23] DOC Add dropdowns to Module 2.3 Clustering (#26619) Co-authored-by: ArturoAmorQ Co-authored-by: Gael Varoquaux --- doc/modules/clustering.rst | 1176 +++++++++++++++++++----------------- 1 file changed, 626 insertions(+), 550 deletions(-) diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst index c64b3d9d646c9..f435079a88ca8 100644 --- a/doc/modules/clustering.rst +++ b/doc/modules/clustering.rst @@ -259,17 +259,22 @@ threads, please refer to our :ref:`parallelism` notes. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating when - k-means performs intuitively and when it does not - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering handwritten digits + * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_assumptions.py`: Demonstrating + when k-means performs intuitively and when it does not + * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`: Clustering + handwritten digits -.. topic:: References: - * `"k-means++: The advantages of careful seeding" - `_ - Arthur, David, and Sergei Vassilvitskii, - *Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete - algorithms*, Society for Industrial and Applied Mathematics (2007) +|details-start| +**References** +|details-split| + +* `"k-means++: The advantages of careful seeding" + `_ Arthur, David, and + Sergei Vassilvitskii, *Proceedings of the eighteenth annual ACM-SIAM symposium + on Discrete algorithms*, Society for Industrial and Applied Mathematics (2007) + +|details-end| .. _mini_batch_kmeans: @@ -313,15 +318,16 @@ small, as shown in the example and cited reference. * :ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`: Document clustering using :class:`KMeans` and :class:`MiniBatchKMeans` based on sparse data - * :ref:`sphx_glr_auto_examples_cluster_plot_dict_face_patches.py` +|details-start| +**References** +|details-split| +* `"Web Scale K-Means clustering" + `_ + D. Sculley, *Proceedings of the 19th international conference on World + wide web* (2010) -.. topic:: References: - - * `"Web Scale K-Means clustering" - `_ - D. Sculley, *Proceedings of the 19th international conference on World - wide web* (2010) +|details-end| .. _affinity_propagation: @@ -358,53 +364,57 @@ convergence. Further, the memory complexity is of the order sparse similarity matrix is used. This makes Affinity Propagation most appropriate for small to medium sized datasets. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity - Propagation on a synthetic 2D datasets with 3 classes. - - * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity Propagation on - Financial time series to find groups of companies +|details-start| +**Algorithm description** +|details-split| - -**Algorithm description:** The messages sent between points belong to one of two categories. The first is -the responsibility :math:`r(i, k)`, -which is the accumulated evidence that sample :math:`k` -should be the exemplar for sample :math:`i`. -The second is the availability :math:`a(i, k)` -which is the accumulated evidence that sample :math:`i` -should choose sample :math:`k` to be its exemplar, -and considers the values for all other samples that :math:`k` should -be an exemplar. In this way, exemplars are chosen by samples if they are (1) -similar enough to many samples and (2) chosen by many samples to be -representative of themselves. - -More formally, the responsibility of a sample :math:`k` -to be the exemplar of sample :math:`i` is given by: +the responsibility :math:`r(i, k)`, which is the accumulated evidence that +sample :math:`k` should be the exemplar for sample :math:`i`. The second is the +availability :math:`a(i, k)` which is the accumulated evidence that sample +:math:`i` should choose sample :math:`k` to be its exemplar, and considers the +values for all other samples that :math:`k` should be an exemplar. In this way, +exemplars are chosen by samples if they are (1) similar enough to many samples +and (2) chosen by many samples to be representative of themselves. + +More formally, the responsibility of a sample :math:`k` to be the exemplar of +sample :math:`i` is given by: .. math:: r(i, k) \leftarrow s(i, k) - max [ a(i, k') + s(i, k') \forall k' \neq k ] Where :math:`s(i, k)` is the similarity between samples :math:`i` and :math:`k`. -The availability of sample :math:`k` -to be the exemplar of sample :math:`i` is given by: +The availability of sample :math:`k` to be the exemplar of sample :math:`i` is +given by: .. math:: - a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', k)}] + a(i, k) \leftarrow min [0, r(k, k) + \sum_{i'~s.t.~i' \notin \{i, k\}}{r(i', + k)}] -To begin with, all values for :math:`r` and :math:`a` are set to zero, -and the calculation of each iterates until convergence. -As discussed above, in order to avoid numerical oscillations when updating the -messages, the damping factor :math:`\lambda` is introduced to iteration process: +To begin with, all values for :math:`r` and :math:`a` are set to zero, and the +calculation of each iterates until convergence. As discussed above, in order to +avoid numerical oscillations when updating the messages, the damping factor +:math:`\lambda` is introduced to iteration process: .. math:: r_{t+1}(i, k) = \lambda\cdot r_{t}(i, k) + (1-\lambda)\cdot r_{t+1}(i, k) .. math:: a_{t+1}(i, k) = \lambda\cdot a_{t}(i, k) + (1-\lambda)\cdot a_{t+1}(i, k) where :math:`t` indicates the iteration times. +|details-end| + + +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_cluster_plot_affinity_propagation.py`: Affinity + Propagation on a synthetic 2D datasets with 3 classes. + + * :ref:`sphx_glr_auto_examples_applications_plot_stock_market.py` Affinity + Propagation on Financial time series to find groups of companies + + .. _mean_shift: Mean Shift @@ -415,36 +425,43 @@ for centroids to be the mean of the points within a given region. These candidates are then filtered in a post-processing stage to eliminate near-duplicates to form the final set of centroids. -The position of centroid candidates is iteratively adjusted using a technique called hill -climbing, which finds local maxima of the estimated probability density. -Given a candidate centroid :math:`x` for iteration :math:`t`, the candidate -is updated according to the following equation: +|details-start| +**Mathematical details** +|details-split| + +The position of centroid candidates is iteratively adjusted using a technique +called hill climbing, which finds local maxima of the estimated probability +density. Given a candidate centroid :math:`x` for iteration :math:`t`, the +candidate is updated according to the following equation: .. math:: x^{t+1} = x^t + m(x^t) -Where :math:`m` is the *mean shift* vector that is computed for each -centroid that points towards a region of the maximum increase in the density of points. -To compute :math:`m` we define :math:`N(x)` as the neighborhood of samples within -a given distance around :math:`x`. Then :math:`m` is computed using the following -equation, effectively updating a centroid to be the mean of the samples within -its neighborhood: +Where :math:`m` is the *mean shift* vector that is computed for each centroid +that points towards a region of the maximum increase in the density of points. +To compute :math:`m` we define :math:`N(x)` as the neighborhood of samples +within a given distance around :math:`x`. Then :math:`m` is computed using the +following equation, effectively updating a centroid to be the mean of the +samples within its neighborhood: .. math:: m(x) = \frac{1}{|N(x)|} \sum_{x_j \in N(x)}x_j - x -In general, the equation for :math:`m` depends on a kernel used for density estimation. -The generic formula is: +In general, the equation for :math:`m` depends on a kernel used for density +estimation. The generic formula is: .. math:: - m(x) = \frac{\sum_{x_j \in N(x)}K(x_j - x)x_j}{\sum_{x_j \in N(x)}K(x_j - x)} - x + m(x) = \frac{\sum_{x_j \in N(x)}K(x_j - x)x_j}{\sum_{x_j \in N(x)}K(x_j - + x)} - x -In our implementation, :math:`K(x)` is equal to 1 if :math:`x` is small enough and is -equal to 0 otherwise. Effectively :math:`K(y - x)` indicates whether :math:`y` is in -the neighborhood of :math:`x`. +In our implementation, :math:`K(x)` is equal to 1 if :math:`x` is small enough +and is equal to 0 otherwise. Effectively :math:`K(y - x)` indicates whether +:math:`y` is in the neighborhood of :math:`x`. + +|details-end| The algorithm automatically sets the number of clusters, instead of relying on a parameter ``bandwidth``, which dictates the size of the region to search through. @@ -468,15 +485,19 @@ given sample. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift clustering - on a synthetic 2D datasets with 3 classes. + * :ref:`sphx_glr_auto_examples_cluster_plot_mean_shift.py`: Mean Shift + clustering on a synthetic 2D datasets with 3 classes. -.. topic:: References: - * :doi:`"Mean shift: A robust approach toward feature space analysis" - <10.1109/34.1000236>` - D. Comaniciu and P. Meer, *IEEE Transactions on Pattern Analysis and Machine Intelligence* (2002) +|details-start| +**References** +|details-split| +* :doi:`"Mean shift: A robust approach toward feature space analysis" + <10.1109/34.1000236>` D. Comaniciu and P. Meer, *IEEE Transactions on Pattern + Analysis and Machine Intelligence* (2002) + +|details-end| .. _spectral_clustering: @@ -528,23 +549,24 @@ computed using a function of a gradient of the image. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting objects - from a noisy background using spectral clustering. + * :ref:`sphx_glr_auto_examples_cluster_plot_segmentation_toy.py`: Segmenting + objects from a noisy background using spectral clustering. - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral clustering - to split the image of coins in regions. + * :ref:`sphx_glr_auto_examples_cluster_plot_coin_segmentation.py`: Spectral + clustering to split the image of coins in regions. .. |coin_kmeans| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_001.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 .. |coin_discretize| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_002.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 .. |coin_cluster_qr| image:: ../auto_examples/cluster/images/sphx_glr_plot_coin_segmentation_003.png - :target: ../auto_examples/cluster/plot_coin_segmentation.html - :scale: 35 + :target: ../auto_examples/cluster/plot_coin_segmentation.html + :scale: 35 + Different label assignment strategies ------------------------------------- @@ -566,14 +588,18 @@ below. |coin_kmeans| |coin_discretize| |coin_cluster_qr| ================================ ================================ ================================ -.. topic:: References: +|details-start| +**References** +|details-split| - * `"Multiclass spectral clustering" - `_ - Stella X. Yu, Jianbo Shi, 2003 +* `"Multiclass spectral clustering" + `_ + Stella X. Yu, Jianbo Shi, 2003 - * :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` - Anil Damle, Victor Minden, Lexing Ying, 2019 +* :doi:`"Simple, direct, and efficient multi-way spectral clustering"<10.1093/imaiai/iay008>` + Anil Damle, Victor Minden, Lexing Ying, 2019 + +|details-end| .. _spectral_clustering_graph: @@ -589,28 +615,28 @@ graph, and SpectralClustering is initialized with `affinity='precomputed'`:: ... assign_labels='discretize') >>> sc.fit_predict(adjacency_matrix) # doctest: +SKIP -.. topic:: References: +|details-start| +**References** +|details-split| - * :doi:`"A Tutorial on Spectral Clustering" - <10.1007/s11222-007-9033-z>` - Ulrike von Luxburg, 2007 +* :doi:`"A Tutorial on Spectral Clustering" <10.1007/s11222-007-9033-z>` Ulrike + von Luxburg, 2007 - * :doi:`"Normalized cuts and image segmentation" - <10.1109/34.868688>` - Jianbo Shi, Jitendra Malik, 2000 +* :doi:`"Normalized cuts and image segmentation" <10.1109/34.868688>` Jianbo + Shi, Jitendra Malik, 2000 - * `"A Random Walks View of Spectral Segmentation" - `_ - Marina Meila, Jianbo Shi, 2001 +* `"A Random Walks View of Spectral Segmentation" + `_ + Marina Meila, Jianbo Shi, 2001 - * `"On Spectral Clustering: Analysis and an algorithm" - `_ - Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 +* `"On Spectral Clustering: Analysis and an algorithm" + `_ + Andrew Y. Ng, Michael I. Jordan, Yair Weiss, 2001 - * :arxiv:`"Preconditioned Spectral Clustering for Stochastic - Block Partition Streaming Graph Challenge" - <1708.07481>` - David Zhuzhunashvili, Andrew Knyazev +* :arxiv:`"Preconditioned Spectral Clustering for Stochastic Block Partition + Streaming Graph Challenge" <1708.07481>` David Zhuzhunashvili, Andrew Knyazev + +|details-end| .. _hierarchical_clustering: @@ -673,8 +699,9 @@ Single linkage can also perform well on non-globular data. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of the - different linkage strategies in a real dataset. + * :ref:`sphx_glr_auto_examples_cluster_plot_digits_linkage.py`: exploration of + the different linkage strategies in a real dataset. + Visualization of cluster hierarchy ---------------------------------- @@ -688,7 +715,6 @@ of the data, though more so in the case of small sample sizes. :scale: 42 - Adding connectivity constraints ------------------------------- @@ -728,21 +754,6 @@ using :func:`sklearn.feature_extraction.image.grid_to_graph` to enable only merging of neighboring pixels on an image, as in the :ref:`coin ` example. -.. topic:: Examples: - - * :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward clustering - to split the image of coins in regions. - - * :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example of - Ward algorithm on a swiss-roll, comparison of structured approaches - versus unstructured approaches. - - * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: - Example of dimensionality reduction with feature agglomeration based on - Ward hierarchical clustering. - - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` - .. warning:: **Connectivity constraints with single, average and complete linkage** Connectivity constraints and single, complete or average linkage can enhance @@ -770,6 +781,21 @@ enable only merging of neighboring pixels on an image, as in the :target: ../auto_examples/cluster/plot_agglomerative_clustering.html :scale: 38 +.. topic:: Examples: + + * :ref:`sphx_glr_auto_examples_cluster_plot_coin_ward_segmentation.py`: Ward + clustering to split the image of coins in regions. + + * :ref:`sphx_glr_auto_examples_cluster_plot_ward_structured_vs_unstructured.py`: Example + of Ward algorithm on a swiss-roll, comparison of structured approaches + versus unstructured approaches. + + * :ref:`sphx_glr_auto_examples_cluster_plot_feature_agglomeration_vs_univariate_selection.py`: Example + of dimensionality reduction with feature agglomeration based on Ward + hierarchical clustering. + + * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering.py` + Varying the metric ------------------- @@ -804,7 +830,8 @@ each class. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` + * :ref:`sphx_glr_auto_examples_cluster_plot_agglomerative_clustering_metrics.py` + Bisecting K-Means ----------------- @@ -847,24 +874,26 @@ Difference between Bisecting K-Means and regular K-Means can be seen on example While the regular K-Means algorithm tends to create non-related clusters, clusters from Bisecting K-Means are well ordered and create quite a visible hierarchy. -.. topic:: References: - - * `"A Comparison of Document Clustering Techniques" - `_ - Michael Steinbach, George Karypis and Vipin Kumar, - Department of Computer Science and Egineering, University of Minnesota - (June 2000) - * `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog Data" - `_ - K.Abirami and Dr.P.Mayilvahanan, - International Journal of Emerging Technologies in Engineering Research (IJETER) - Volume 4, Issue 8, (August 2016) - * `"Bisecting K-means Algorithm Based on K-valued Self-determining - and Clustering Center Optimization" - `_ - Jian Di, Xinyue Gou - School of Control and Computer Engineering,North China Electric Power University, - Baoding, Hebei, China (August 2017) +|details-start| +**References** +|details-split| + +* `"A Comparison of Document Clustering Techniques" + `_ Michael + Steinbach, George Karypis and Vipin Kumar, Department of Computer Science and + Egineering, University of Minnesota (June 2000) +* `"Performance Analysis of K-Means and Bisecting K-Means Algorithms in Weblog + Data" + `_ + K.Abirami and Dr.P.Mayilvahanan, International Journal of Emerging + Technologies in Engineering Research (IJETER) Volume 4, Issue 8, (August 2016) +* `"Bisecting K-means Algorithm Based on K-valued Self-determining and + Clustering Center Optimization" + `_ Jian Di, Xinyue Gou School + of Control and Computer Engineering,North China Electric Power University, + Baoding, Hebei, China (August 2017) + +|details-end| .. _dbscan: @@ -927,62 +956,70 @@ by black points below. * :ref:`sphx_glr_auto_examples_cluster_plot_dbscan.py` -.. topic:: Implementation - - The DBSCAN algorithm is deterministic, always generating the same clusters - when given the same data in the same order. However, the results can differ when - data is provided in a different order. First, even though the core samples - will always be assigned to the same clusters, the labels of those clusters - will depend on the order in which those samples are encountered in the data. - Second and more importantly, the clusters to which non-core samples are assigned - can differ depending on the data order. This would happen when a non-core sample - has a distance lower than ``eps`` to two core samples in different clusters. By the - triangular inequality, those two core samples must be more distant than - ``eps`` from each other, or they would be in the same cluster. The non-core - sample is assigned to whichever cluster is generated first in a pass - through the data, and so the results will depend on the data ordering. - - The current implementation uses ball trees and kd-trees - to determine the neighborhood of points, - which avoids calculating the full distance matrix - (as was done in scikit-learn versions before 0.14). - The possibility to use custom metrics is retained; - for details, see :class:`~sklearn.neighbors.NearestNeighbors`. - -.. topic:: Memory consumption for large sample sizes - - This implementation is by default not memory efficient because it constructs - a full pairwise similarity matrix in the case where kd-trees or ball-trees cannot - be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` floats. - A couple of mechanisms for getting around this are: - - - Use :ref:`OPTICS ` clustering in conjunction with the - `extract_dbscan` method. OPTICS clustering also calculates the full - pairwise matrix, but only keeps one row in memory at a time (memory - complexity n). - - - A sparse radius neighborhood graph (where missing entries are presumed to - be out of eps) can be precomputed in a memory-efficient way and dbscan - can be run over this with ``metric='precomputed'``. See - :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. - - - The dataset can be compressed, either by removing exact duplicates if - these occur in your data, or by using BIRCH. Then you only have a - relatively small number of representatives for a large number of points. - You can then provide a ``sample_weight`` when fitting DBSCAN. - -.. topic:: References: - - * `"A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases - with Noise" `_ - Ester, M., H. P. Kriegel, J. Sander, and X. Xu, - In Proceedings of the 2nd International Conference on Knowledge Discovery - and Data Mining, Portland, OR, AAAI Press, pp. 226–231. 1996 - - * :doi:`"DBSCAN revisited, revisited: why and how you should (still) use DBSCAN." - <10.1145/3068335>` - Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017). - In ACM Transactions on Database Systems (TODS), 42(3), 19. +|details-start| +**Implementation** +|details-split| + +The DBSCAN algorithm is deterministic, always generating the same clusters when +given the same data in the same order. However, the results can differ when +data is provided in a different order. First, even though the core samples will +always be assigned to the same clusters, the labels of those clusters will +depend on the order in which those samples are encountered in the data. Second +and more importantly, the clusters to which non-core samples are assigned can +differ depending on the data order. This would happen when a non-core sample +has a distance lower than ``eps`` to two core samples in different clusters. By +the triangular inequality, those two core samples must be more distant than +``eps`` from each other, or they would be in the same cluster. The non-core +sample is assigned to whichever cluster is generated first in a pass through the +data, and so the results will depend on the data ordering. + +The current implementation uses ball trees and kd-trees to determine the +neighborhood of points, which avoids calculating the full distance matrix (as +was done in scikit-learn versions before 0.14). The possibility to use custom +metrics is retained; for details, see :class:`NearestNeighbors`. + +|details-end| + +|details-start| +**Memory consumption for large sample sizes** +|details-split| + +This implementation is by default not memory efficient because it constructs a +full pairwise similarity matrix in the case where kd-trees or ball-trees cannot +be used (e.g., with sparse matrices). This matrix will consume :math:`n^2` +floats. A couple of mechanisms for getting around this are: + +- Use :ref:`OPTICS ` clustering in conjunction with the `extract_dbscan` + method. OPTICS clustering also calculates the full pairwise matrix, but only + keeps one row in memory at a time (memory complexity n). + +- A sparse radius neighborhood graph (where missing entries are presumed to be + out of eps) can be precomputed in a memory-efficient way and dbscan can be run + over this with ``metric='precomputed'``. See + :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors_graph`. + +- The dataset can be compressed, either by removing exact duplicates if these + occur in your data, or by using BIRCH. Then you only have a relatively small + number of representatives for a large number of points. You can then provide a + ``sample_weight`` when fitting DBSCAN. + +|details-end| + +|details-start| +**References** +|details-split| + +* `A Density-Based Algorithm for Discovering Clusters in Large Spatial + Databases with Noise `_ + Ester, M., H. P. Kriegel, J. Sander, and X. Xu, In Proceedings of the 2nd + International Conference on Knowledge Discovery and Data Mining, Portland, OR, + AAAI Press, pp. 226–231. 1996 + +* :doi:`DBSCAN revisited, revisited: why and how you should (still) use DBSCAN. + <10.1145/3068335>` Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, + X. (2017). In ACM Transactions on Database Systems (TODS), 42(3), 19. + +|details-end| .. _hdbscan: @@ -1078,17 +1115,17 @@ simplify the hyperparameter space. .. topic:: References: - .. [CM2013] Campello, R.J.G.B., Moulavi, D., Sander, J. (2013). Density-Based Clustering - Based on Hierarchical Density Estimates. In: Pei, J., Tseng, V.S., Cao, L., - Motoda, H., Xu, G. (eds) Advances in Knowledge Discovery and Data Mining. - PAKDD 2013. Lecture Notes in Computer Science(), vol 7819. Springer, Berlin, - Heidelberg. - :doi:`Density-Based Clustering Based on Hierarchical Density Estimates <10.1007/978-3-642-37456-2_14>` + .. [CM2013] Campello, R.J.G.B., Moulavi, D., Sander, J. (2013). Density-Based + Clustering Based on Hierarchical Density Estimates. In: Pei, J., Tseng, V.S., + Cao, L., Motoda, H., Xu, G. (eds) Advances in Knowledge Discovery and Data + Mining. PAKDD 2013. Lecture Notes in Computer Science(), vol 7819. Springer, + Berlin, Heidelberg. :doi:`Density-Based Clustering Based on Hierarchical + Density Estimates <10.1007/978-3-642-37456-2_14>` - .. [LJ2017] L. McInnes and J. Healy, (2017). Accelerated Hierarchical Density Based - Clustering. In: IEEE International Conference on Data Mining Workshops (ICDMW), - 2017, pp. 33-42. - :doi:`Accelerated Hierarchical Density Based Clustering <10.1109/ICDMW.2017.12>` + .. [LJ2017] L. McInnes and J. Healy, (2017). Accelerated Hierarchical Density + Based Clustering. In: IEEE International Conference on Data Mining Workshops + (ICDMW), 2017, pp. 33-42. :doi:`Accelerated Hierarchical Density Based + Clustering <10.1109/ICDMW.2017.12>` .. _optics: @@ -1136,45 +1173,56 @@ represented as children of a larger parent cluster. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` + * :ref:`sphx_glr_auto_examples_cluster_plot_optics.py` -.. topic:: Comparison with DBSCAN +|details-start| +**Comparison with DBSCAN** +|details-split| - The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are - very similar, but not always identical; specifically, labeling of periphery - and noise points. This is in part because the first samples of each dense - area processed by OPTICS have a large reachability value while being close - to other points in their area, and will thus sometimes be marked as noise - rather than periphery. This affects adjacent points when they are - considered as candidates for being marked as either periphery or noise. +The results from OPTICS ``cluster_optics_dbscan`` method and DBSCAN are very +similar, but not always identical; specifically, labeling of periphery and noise +points. This is in part because the first samples of each dense area processed +by OPTICS have a large reachability value while being close to other points in +their area, and will thus sometimes be marked as noise rather than periphery. +This affects adjacent points when they are considered as candidates for being +marked as either periphery or noise. - Note that for any single value of ``eps``, DBSCAN will tend to have a - shorter run time than OPTICS; however, for repeated runs at varying ``eps`` - values, a single run of OPTICS may require less cumulative runtime than - DBSCAN. It is also important to note that OPTICS' output is close to - DBSCAN's only if ``eps`` and ``max_eps`` are close. +Note that for any single value of ``eps``, DBSCAN will tend to have a shorter +run time than OPTICS; however, for repeated runs at varying ``eps`` values, a +single run of OPTICS may require less cumulative runtime than DBSCAN. It is also +important to note that OPTICS' output is close to DBSCAN's only if ``eps`` and +``max_eps`` are close. -.. topic:: Computational Complexity +|details-end| - Spatial indexing trees are used to avoid calculating the full distance - matrix, and allow for efficient memory usage on large sets of samples. - Different distance metrics can be supplied via the ``metric`` keyword. +|details-start| +**Computational Complexity** +|details-split| - For large datasets, similar (but not identical) results can be obtained via - :class:`HDBSCAN`. The HDBSCAN implementation is - multithreaded, and has better algorithmic runtime complexity than OPTICS, - at the cost of worse memory scaling. For extremely large datasets that - exhaust system memory using HDBSCAN, OPTICS will maintain :math:`n` (as opposed - to :math:`n^2`) memory scaling; however, tuning of the ``max_eps`` parameter - will likely need to be used to give a solution in a reasonable amount of - wall time. +Spatial indexing trees are used to avoid calculating the full distance matrix, +and allow for efficient memory usage on large sets of samples. Different +distance metrics can be supplied via the ``metric`` keyword. -.. topic:: References: +For large datasets, similar (but not identical) results can be obtained via +:class:`HDBSCAN`. The HDBSCAN implementation is multithreaded, and has better +algorithmic runtime complexity than OPTICS, at the cost of worse memory scaling. +For extremely large datasets that exhaust system memory using HDBSCAN, OPTICS +will maintain :math:`n` (as opposed to :math:`n^2`) memory scaling; however, +tuning of the ``max_eps`` parameter will likely need to be used to give a +solution in a reasonable amount of wall time. + +|details-end| - * "OPTICS: ordering points to identify the clustering structure." - Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander. - In ACM Sigmod Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. +|details-start| +**References** +|details-split| + +* "OPTICS: ordering points to identify the clustering structure." Ankerst, + Mihael, Markus M. Breunig, Hans-Peter Kriegel, and Jörg Sander. In ACM Sigmod + Record, vol. 28, no. 2, pp. 49-60. ACM, 1999. + +|details-end| .. _birch: @@ -1210,28 +1258,35 @@ If ``n_clusters`` is set to None, the subclusters from the leaves are directly read off, otherwise a global clustering step labels these subclusters into global clusters (labels) and the samples are mapped to the global label of the nearest subcluster. -**Algorithm description:** +|details-start| +**Algorithm description** +|details-split| -- A new sample is inserted into the root of the CF Tree which is a CF Node. - It is then merged with the subcluster of the root, that has the smallest - radius after merging, constrained by the threshold and branching factor conditions. - If the subcluster has any child node, then this is done repeatedly till it reaches - a leaf. After finding the nearest subcluster in the leaf, the properties of this - subcluster and the parent subclusters are recursively updated. +- A new sample is inserted into the root of the CF Tree which is a CF Node. It + is then merged with the subcluster of the root, that has the smallest radius + after merging, constrained by the threshold and branching factor conditions. + If the subcluster has any child node, then this is done repeatedly till it + reaches a leaf. After finding the nearest subcluster in the leaf, the + properties of this subcluster and the parent subclusters are recursively + updated. - If the radius of the subcluster obtained by merging the new sample and the nearest subcluster is greater than the square of the threshold and if the - number of subclusters is greater than the branching factor, then a space is temporarily - allocated to this new sample. The two farthest subclusters are taken and - the subclusters are divided into two groups on the basis of the distance - between these subclusters. + number of subclusters is greater than the branching factor, then a space is + temporarily allocated to this new sample. The two farthest subclusters are + taken and the subclusters are divided into two groups on the basis of the + distance between these subclusters. + +- If this split node has a parent subcluster and there is room for a new + subcluster, then the parent is split into two. If there is no room, then this + node is again split into two and the process is continued recursively, till it + reaches the root. -- If this split node has a parent subcluster and there is room - for a new subcluster, then the parent is split into two. If there is no room, - then this node is again split into two and the process is continued - recursively, till it reaches the root. +|details-end| +|details-start| **BIRCH or MiniBatchKMeans?** +|details-split| - BIRCH does not scale very well to high dimensional data. As a rule of thumb if ``n_features`` is greater than twenty, it is generally better to use MiniBatchKMeans. @@ -1239,8 +1294,14 @@ clusters (labels) and the samples are mapped to the global label of the nearest large number of subclusters either as a preprocessing step or otherwise, BIRCH is more useful than MiniBatchKMeans. +.. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png + :target: ../auto_examples/cluster/plot_birch_vs_minibatchkmeans.html + +|details-end| +|details-start| **How to use partial_fit?** +|details-split| To avoid the computation of global clustering, for every call of ``partial_fit`` the user is advised @@ -1252,18 +1313,20 @@ the user is advised 4. Call ``partial_fit`` finally with no arguments, i.e. ``brc.partial_fit()`` which performs the global clustering. -.. image:: ../auto_examples/cluster/images/sphx_glr_plot_birch_vs_minibatchkmeans_001.png - :target: ../auto_examples/cluster/plot_birch_vs_minibatchkmeans.html +|details-end| -.. topic:: References: +|details-start| +**References** +|details-split| + +* Tian Zhang, Raghu Ramakrishnan, Maron Livny BIRCH: An efficient data + clustering method for large databases. + https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf - * Tian Zhang, Raghu Ramakrishnan, Maron Livny - BIRCH: An efficient data clustering method for large databases. - https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf +* Roberto Perdisci JBirch - Java implementation of BIRCH clustering algorithm + https://code.google.com/archive/p/jbirch - * Roberto Perdisci - JBirch - Java implementation of BIRCH clustering algorithm - https://code.google.com/archive/p/jbirch +|details-end| .. _clustering_evaluation: @@ -1346,105 +1409,104 @@ will not necessarily be close to zero.:: -0.07... -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- **Interpretability**: The unadjusted Rand index is proportional - to the number of sample pairs whose labels are the same in both - `labels_pred` and `labels_true`, or are different in both. + - **Interpretability**: The unadjusted Rand index is proportional to the + number of sample pairs whose labels are the same in both `labels_pred` and + `labels_true`, or are different in both. -- **Random (uniform) label assignments have an adjusted Rand index - score close to 0.0** for any value of ``n_clusters`` and - ``n_samples`` (which is not the case for the unadjusted Rand index - or the V-measure for instance). + - **Random (uniform) label assignments have an adjusted Rand index score close + to 0.0** for any value of ``n_clusters`` and ``n_samples`` (which is not the + case for the unadjusted Rand index or the V-measure for instance). -- **Bounded range**: Lower values indicate different labelings, - similar clusterings have a high (adjusted or unadjusted) Rand index, - 1.0 is the perfect match score. The score range is [0, 1] for the - unadjusted Rand index and [-1, 1] for the adjusted Rand index. + - **Bounded range**: Lower values indicate different labelings, similar + clusterings have a high (adjusted or unadjusted) Rand index, 1.0 is the + perfect match score. The score range is [0, 1] for the unadjusted Rand index + and [-1, 1] for the adjusted Rand index. -- **No assumption is made on the cluster structure**: The (adjusted or - unadjusted) Rand index can be used to compare all kinds of - clustering algorithms, and can be used to compare clustering - algorithms such as k-means which assumes isotropic blob shapes with - results of spectral clustering algorithms which can find cluster - with "folded" shapes. + - **No assumption is made on the cluster structure**: The (adjusted or + unadjusted) Rand index can be used to compare all kinds of clustering + algorithms, and can be used to compare clustering algorithms such as k-means + which assumes isotropic blob shapes with results of spectral clustering + algorithms which can find cluster with "folded" shapes. +.. topic:: Drawbacks: -Drawbacks -~~~~~~~~~ + - Contrary to inertia, the **(adjusted or unadjusted) Rand index requires + knowledge of the ground truth classes** which is almost never available in + practice or requires manual assignment by human annotators (as in the + supervised learning setting). -- Contrary to inertia, the **(adjusted or unadjusted) Rand index - requires knowledge of the ground truth classes** which is almost - never available in practice or requires manual assignment by human - annotators (as in the supervised learning setting). + However (adjusted or unadjusted) Rand index can also be useful in a purely + unsupervised setting as a building block for a Consensus Index that can be + used for clustering model selection (TODO). - However (adjusted or unadjusted) Rand index can also be useful in a - purely unsupervised setting as a building block for a Consensus - Index that can be used for clustering model selection (TODO). - -- The **unadjusted Rand index is often close to 1.0** even if the - clusterings themselves differ significantly. This can be understood - when interpreting the Rand index as the accuracy of element pair - labeling resulting from the clusterings: In practice there often is - a majority of element pairs that are assigned the ``different`` pair - label under both the predicted and the ground truth clustering - resulting in a high proportion of pair labels that agree, which - leads subsequently to a high score. + - The **unadjusted Rand index is often close to 1.0** even if the clusterings + themselves differ significantly. This can be understood when interpreting + the Rand index as the accuracy of element pair labeling resulting from the + clusterings: In practice there often is a majority of element pairs that are + assigned the ``different`` pair label under both the predicted and the + ground truth clustering resulting in a high proportion of pair labels that + agree, which leads subsequently to a high score. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: - Analysis of the impact of the dataset size on the value of - clustering measures for random assignments. + * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: + Analysis of the impact of the dataset size on the value of clustering measures + for random assignments. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| -If C is a ground truth class assignment and K the clustering, let us -define :math:`a` and :math:`b` as: +If C is a ground truth class assignment and K the clustering, let us define +:math:`a` and :math:`b` as: -- :math:`a`, the number of pairs of elements that are in the same set - in C and in the same set in K +- :math:`a`, the number of pairs of elements that are in the same set in C and + in the same set in K -- :math:`b`, the number of pairs of elements that are in different sets - in C and in different sets in K +- :math:`b`, the number of pairs of elements that are in different sets in C and + in different sets in K The unadjusted Rand index is then given by: .. math:: \text{RI} = \frac{a + b}{C_2^{n_{samples}}} -where :math:`C_2^{n_{samples}}` is the total number of possible pairs -in the dataset. It does not matter if the calculation is performed on -ordered pairs or unordered pairs as long as the calculation is -performed consistently. +where :math:`C_2^{n_{samples}}` is the total number of possible pairs in the +dataset. It does not matter if the calculation is performed on ordered pairs or +unordered pairs as long as the calculation is performed consistently. -However, the Rand index does not guarantee that random label assignments -will get a value close to zero (esp. if the number of clusters is in -the same order of magnitude as the number of samples). +However, the Rand index does not guarantee that random label assignments will +get a value close to zero (esp. if the number of clusters is in the same order +of magnitude as the number of samples). To counter this effect we can discount the expected RI :math:`E[\text{RI}]` of random labelings by defining the adjusted Rand index as follows: .. math:: \text{ARI} = \frac{\text{RI} - E[\text{RI}]}{\max(\text{RI}) - E[\text{RI}]} -.. topic:: References +|details-end| + +|details-start| +**References** +|details-split| - * `Comparing Partitions - `_ - L. Hubert and P. Arabie, Journal of Classification 1985 +* `Comparing Partitions + `_ L. Hubert and P. + Arabie, Journal of Classification 1985 - * `Properties of the Hubert-Arabie adjusted Rand index - `_ - D. Steinley, Psychological Methods 2004 +* `Properties of the Hubert-Arabie adjusted Rand index + `_ D. Steinley, Psychological + Methods 2004 - * `Wikipedia entry for the Rand index - `_ +* `Wikipedia entry for the Rand index + `_ - * `Wikipedia entry for the adjusted Rand index - `_ +* `Wikipedia entry for the adjusted Rand index + `_ +|details-end| .. _mutual_info_score: @@ -1502,44 +1564,39 @@ Bad (e.g. independent labelings) have non-positive scores:: -0.10526... -Advantages -~~~~~~~~~~ - -- **Random (uniform) label assignments have a AMI score close to 0.0** - for any value of ``n_clusters`` and ``n_samples`` (which is not the - case for raw Mutual Information or the V-measure for instance). +.. topic:: Advantages: -- **Upper bound of 1**: Values close to zero indicate two label - assignments that are largely independent, while values close to one - indicate significant agreement. Further, an AMI of exactly 1 indicates - that the two label assignments are equal (with or without permutation). + - **Random (uniform) label assignments have a AMI score close to 0.0** for any + value of ``n_clusters`` and ``n_samples`` (which is not the case for raw + Mutual Information or the V-measure for instance). + - **Upper bound of 1**: Values close to zero indicate two label assignments + that are largely independent, while values close to one indicate significant + agreement. Further, an AMI of exactly 1 indicates that the two label + assignments are equal (with or without permutation). -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- Contrary to inertia, **MI-based measures require the knowledge - of the ground truth classes** while almost never available in practice or - requires manual assignment by human annotators (as in the supervised learning - setting). + - Contrary to inertia, **MI-based measures require the knowledge of the ground + truth classes** while almost never available in practice or requires manual + assignment by human annotators (as in the supervised learning setting). - However MI-based measures can also be useful in purely unsupervised setting as a - building block for a Consensus Index that can be used for clustering - model selection. - -- NMI and MI are not adjusted against chance. + However MI-based measures can also be useful in purely unsupervised setting + as a building block for a Consensus Index that can be used for clustering + model selection. + - NMI and MI are not adjusted against chance. .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis of - the impact of the dataset size on the value of clustering measures - for random assignments. This example also includes the Adjusted Rand - Index. + * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for + random assignments. This example also includes the Adjusted Rand Index. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| Assume two label assignments (of the same N objects), :math:`U` and :math:`V`. Their entropy is the amount of uncertainty for a partition set, defined by: @@ -1573,63 +1630,62 @@ adjusted for chance and will tend to increase as the number of different labels between the label assignments. The expected value for the mutual information can be calculated using the -following equation [VEB2009]_. In this equation, -:math:`a_i = |U_i|` (the number of elements in :math:`U_i`) and -:math:`b_j = |V_j|` (the number of elements in :math:`V_j`). - +following equation [VEB2009]_. In this equation, :math:`a_i = |U_i|` (the number +of elements in :math:`U_i`) and :math:`b_j = |V_j|` (the number of elements in +:math:`V_j`). .. math:: E[\text{MI}(U,V)]=\sum_{i=1}^{|U|} \sum_{j=1}^{|V|} \sum_{n_{ij}=(a_i+b_j-N)^+ - }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) - \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! - (N-a_i-b_j+n_{ij})!} + }^{\min(a_i, b_j)} \frac{n_{ij}}{N}\log \left( \frac{ N.n_{ij}}{a_i b_j}\right) + \frac{a_i!b_j!(N-a_i)!(N-b_j)!}{N!n_{ij}!(a_i-n_{ij})!(b_j-n_{ij})! + (N-a_i-b_j+n_{ij})!} -Using the expected value, the adjusted mutual information can then be -calculated using a similar form to that of the adjusted Rand index: +Using the expected value, the adjusted mutual information can then be calculated +using a similar form to that of the adjusted Rand index: .. math:: \text{AMI} = \frac{\text{MI} - E[\text{MI}]}{\text{mean}(H(U), H(V)) - E[\text{MI}]} -For normalized mutual information and adjusted mutual information, the normalizing -value is typically some *generalized* mean of the entropies of each clustering. -Various generalized means exist, and no firm rules exist for preferring one over the -others. The decision is largely a field-by-field basis; for instance, in community -detection, the arithmetic mean is most common. Each -normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In our -implementation, this is controlled by the ``average_method`` parameter. +For normalized mutual information and adjusted mutual information, the +normalizing value is typically some *generalized* mean of the entropies of each +clustering. Various generalized means exist, and no firm rules exist for +preferring one over the others. The decision is largely a field-by-field basis; +for instance, in community detection, the arithmetic mean is most common. Each +normalizing method provides "qualitatively similar behaviours" [YAT2016]_. In +our implementation, this is controlled by the ``average_method`` parameter. -Vinh et al. (2010) named variants of NMI and AMI by their averaging method [VEB2010]_. Their -'sqrt' and 'sum' averages are the geometric and arithmetic means; we use these -more broadly common names. +Vinh et al. (2010) named variants of NMI and AMI by their averaging method +[VEB2010]_. Their 'sqrt' and 'sum' averages are the geometric and arithmetic +means; we use these more broadly common names. -.. topic:: References - - * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a - knowledge reuse framework for combining multiple partitions". Journal of - Machine Learning Research 3: 583–617. - `doi:10.1162/153244303321897735 `_. +.. topic:: References: - * `Wikipedia entry for the (normalized) Mutual Information - `_ + * Strehl, Alexander, and Joydeep Ghosh (2002). "Cluster ensembles – a + knowledge reuse framework for combining multiple partitions". Journal of + Machine Learning Research 3: 583–617. `doi:10.1162/153244303321897735 + `_. - * `Wikipedia entry for the Adjusted Mutual Information - `_ + * `Wikipedia entry for the (normalized) Mutual Information + `_ - .. [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures - for clusterings comparison". Proceedings of the 26th Annual International - Conference on Machine Learning - ICML '09. - `doi:10.1145/1553374.1553511 `_. - ISBN 9781605585161. + * `Wikipedia entry for the Adjusted Mutual Information + `_ - .. [VEB2010] Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures for - Clusterings Comparison: Variants, Properties, Normalization and - Correction for Chance". JMLR - + .. [VEB2009] Vinh, Epps, and Bailey, (2009). "Information theoretic measures + for clusterings comparison". Proceedings of the 26th Annual International + Conference on Machine Learning - ICML '09. `doi:10.1145/1553374.1553511 + `_. ISBN + 9781605585161. - .. [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis of - community - detection algorithms on artificial networks". Scientific Reports 6: 30750. - `doi:10.1038/srep30750 `_. + .. [VEB2010] Vinh, Epps, and Bailey, (2010). "Information Theoretic Measures + for Clusterings Comparison: Variants, Properties, Normalization and + Correction for Chance". JMLR + + .. [YAT2016] Yang, Algesheimer, and Tessone, (2016). "A comparative analysis + of community detection algorithms on artificial networks". Scientific + Reports 6: 30750. `doi:10.1038/srep30750 + `_. +|details-end| .. _homogeneity_completeness: @@ -1711,55 +1767,52 @@ homogeneous but not complete:: homogeneity_score(a, b) == completeness_score(b, a) -Advantages -~~~~~~~~~~ - -- **Bounded scores**: 0.0 is as bad as it can be, 1.0 is a perfect score. +.. topic:: Advantages: -- Intuitive interpretation: clustering with bad V-measure can be - **qualitatively analyzed in terms of homogeneity and completeness** - to better feel what 'kind' of mistakes is done by the assignment. + - **Bounded scores**: 0.0 is as bad as it can be, 1.0 is a perfect score. -- **No assumption is made on the cluster structure**: can be used - to compare clustering algorithms such as k-means which assumes isotropic - blob shapes with results of spectral clustering algorithms which can - find cluster with "folded" shapes. + - Intuitive interpretation: clustering with bad V-measure can be + **qualitatively analyzed in terms of homogeneity and completeness** to + better feel what 'kind' of mistakes is done by the assignment. + - **No assumption is made on the cluster structure**: can be used to compare + clustering algorithms such as k-means which assumes isotropic blob shapes + with results of spectral clustering algorithms which can find cluster with + "folded" shapes. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- The previously introduced metrics are **not normalized with regards to - random labeling**: this means that depending on the number of samples, - clusters and ground truth classes, a completely random labeling will - not always yield the same values for homogeneity, completeness and - hence v-measure. In particular **random labeling won't yield zero - scores especially when the number of clusters is large**. + - The previously introduced metrics are **not normalized with regards to + random labeling**: this means that depending on the number of samples, + clusters and ground truth classes, a completely random labeling will not + always yield the same values for homogeneity, completeness and hence + v-measure. In particular **random labeling won't yield zero scores + especially when the number of clusters is large**. - This problem can safely be ignored when the number of samples is more - than a thousand and the number of clusters is less than 10. **For - smaller sample sizes or larger number of clusters it is safer to use - an adjusted index such as the Adjusted Rand Index (ARI)**. + This problem can safely be ignored when the number of samples is more than a + thousand and the number of clusters is less than 10. **For smaller sample + sizes or larger number of clusters it is safer to use an adjusted index such + as the Adjusted Rand Index (ARI)**. -.. figure:: ../auto_examples/cluster/images/sphx_glr_plot_adjusted_for_chance_measures_001.png - :target: ../auto_examples/cluster/plot_adjusted_for_chance_measures.html - :align: center - :scale: 100 - -- These metrics **require the knowledge of the ground truth classes** while - almost never available in practice or requires manual assignment by - human annotators (as in the supervised learning setting). + .. figure:: ../auto_examples/cluster/images/sphx_glr_plot_adjusted_for_chance_measures_001.png + :target: ../auto_examples/cluster/plot_adjusted_for_chance_measures.html + :align: center + :scale: 100 + - These metrics **require the knowledge of the ground truth classes** while + almost never available in practice or requires manual assignment by human + annotators (as in the supervised learning setting). .. topic:: Examples: - * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis of - the impact of the dataset size on the value of clustering measures - for random assignments. + * :ref:`sphx_glr_auto_examples_cluster_plot_adjusted_for_chance_measures.py`: Analysis + of the impact of the dataset size on the value of clustering measures for + random assignments. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| Homogeneity and completeness scores are formally given by: @@ -1767,8 +1820,8 @@ Homogeneity and completeness scores are formally given by: .. math:: c = 1 - \frac{H(K|C)}{H(K)} -where :math:`H(C|K)` is the **conditional entropy of the classes given -the cluster assignments** and is given by: +where :math:`H(C|K)` is the **conditional entropy of the classes given the +cluster assignments** and is given by: .. math:: H(C|K) = - \sum_{c=1}^{|C|} \sum_{k=1}^{|K|} \frac{n_{c,k}}{n} \cdot \log\left(\frac{n_{c,k}}{n_k}\right) @@ -1777,24 +1830,26 @@ and :math:`H(C)` is the **entropy of the classes** and is given by: .. math:: H(C) = - \sum_{c=1}^{|C|} \frac{n_c}{n} \cdot \log\left(\frac{n_c}{n}\right) -with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` -the number of samples respectively belonging to class :math:`c` and -cluster :math:`k`, and finally :math:`n_{c,k}` the number of samples -from class :math:`c` assigned to cluster :math:`k`. +with :math:`n` the total number of samples, :math:`n_c` and :math:`n_k` the +number of samples respectively belonging to class :math:`c` and cluster +:math:`k`, and finally :math:`n_{c,k}` the number of samples from class +:math:`c` assigned to cluster :math:`k`. The **conditional entropy of clusters given class** :math:`H(K|C)` and the **entropy of clusters** :math:`H(K)` are defined in a symmetric manner. -Rosenberg and Hirschberg further define **V-measure** as the **harmonic -mean of homogeneity and completeness**: +Rosenberg and Hirschberg further define **V-measure** as the **harmonic mean of +homogeneity and completeness**: .. math:: v = 2 \cdot \frac{h \cdot c}{h + c} -.. topic:: References +|details-end| + +.. topic:: References: - * `V-Measure: A conditional entropy-based external cluster evaluation - measure `_ - Andrew Rosenberg and Julia Hirschberg, 2007 + * `V-Measure: A conditional entropy-based external cluster evaluation measure + `_ Andrew Rosenberg and Julia + Hirschberg, 2007 .. [B2011] `Identification and Characterization of Events in Social Media `_, Hila @@ -1851,41 +1906,43 @@ Bad (e.g. independent labelings) have zero scores:: >>> metrics.fowlkes_mallows_score(labels_true, labels_pred) 0.0 -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- **Random (uniform) label assignments have a FMI score close to 0.0** - for any value of ``n_clusters`` and ``n_samples`` (which is not the - case for raw Mutual Information or the V-measure for instance). + - **Random (uniform) label assignments have a FMI score close to 0.0** for any + value of ``n_clusters`` and ``n_samples`` (which is not the case for raw + Mutual Information or the V-measure for instance). -- **Upper-bounded at 1**: Values close to zero indicate two label - assignments that are largely independent, while values close to one - indicate significant agreement. Further, values of exactly 0 indicate - **purely** independent label assignments and a FMI of exactly 1 indicates - that the two label assignments are equal (with or without permutation). + - **Upper-bounded at 1**: Values close to zero indicate two label assignments + that are largely independent, while values close to one indicate significant + agreement. Further, values of exactly 0 indicate **purely** independent + label assignments and a FMI of exactly 1 indicates that the two label + assignments are equal (with or without permutation). -- **No assumption is made on the cluster structure**: can be used - to compare clustering algorithms such as k-means which assumes isotropic - blob shapes with results of spectral clustering algorithms which can - find cluster with "folded" shapes. + - **No assumption is made on the cluster structure**: can be used to compare + clustering algorithms such as k-means which assumes isotropic blob shapes + with results of spectral clustering algorithms which can find cluster with + "folded" shapes. +.. topic:: Drawbacks: -Drawbacks -~~~~~~~~~ + - Contrary to inertia, **FMI-based measures require the knowledge of the + ground truth classes** while almost never available in practice or requires + manual assignment by human annotators (as in the supervised learning + setting). -- Contrary to inertia, **FMI-based measures require the knowledge - of the ground truth classes** while almost never available in practice or - requires manual assignment by human annotators (as in the supervised learning - setting). +|details-start| +**References** +|details-split| -.. topic:: References +* E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two + hierarchical clusterings". Journal of the American Statistical + Association. + https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008 - * E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two - hierarchical clusterings". Journal of the American Statistical Association. - https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008 +* `Wikipedia entry for the Fowlkes-Mallows Index + `_ - * `Wikipedia entry for the Fowlkes-Mallows Index - `_ +|details-end| .. _silhouette_coefficient: @@ -1929,35 +1986,38 @@ cluster analysis. >>> metrics.silhouette_score(X, labels, metric='euclidean') 0.55... -.. topic:: References - * Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the - Interpretation and Validation of Cluster Analysis"<10.1016/0377-0427(87)90125-7>` - . Computational and Applied Mathematics 20: 53–65. +.. topic:: Advantages: + - The score is bounded between -1 for incorrect clustering and +1 for highly + dense clustering. Scores around zero indicate overlapping clusters. -Advantages -~~~~~~~~~~ + - The score is higher when clusters are dense and well separated, which + relates to a standard concept of a cluster. -- The score is bounded between -1 for incorrect clustering and +1 for highly - dense clustering. Scores around zero indicate overlapping clusters. +.. topic:: Drawbacks: -- The score is higher when clusters are dense and well separated, which relates - to a standard concept of a cluster. + - The Silhouette Coefficient is generally higher for convex clusters than + other concepts of clusters, such as density based clusters like those + obtained through DBSCAN. +.. topic:: Examples: -Drawbacks -~~~~~~~~~ + * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In + this example the silhouette analysis is used to choose an optimal value for + n_clusters. -- The Silhouette Coefficient is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained - through DBSCAN. -.. topic:: Examples: +|details-start| +**References** +|details-split| - * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In this example - the silhouette analysis is used to choose an optimal value for n_clusters. +* Peter J. Rousseeuw (1987). :doi:`"Silhouettes: a Graphical Aid to the + Interpretation and Validation of Cluster + Analysis"<10.1016/0377-0427(87)90125-7>` . Computational and Applied + Mathematics 20: 53–65. +|details-end| .. _calinski_harabasz_index: @@ -1989,28 +2049,28 @@ cluster analysis: >>> metrics.calinski_harabasz_score(X, labels) 561.59... -Advantages -~~~~~~~~~~ -- The score is higher when clusters are dense and well separated, which relates - to a standard concept of a cluster. +.. topic:: Advantages: -- The score is fast to compute. + - The score is higher when clusters are dense and well separated, which + relates to a standard concept of a cluster. + - The score is fast to compute. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- The Calinski-Harabasz index is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained - through DBSCAN. + - The Calinski-Harabasz index is generally higher for convex clusters than + other concepts of clusters, such as density based clusters like those + obtained through DBSCAN. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| For a set of data :math:`E` of size :math:`n_E` which has been clustered into :math:`k` clusters, the Calinski-Harabasz score :math:`s` is defined as the -ratio of the between-clusters dispersion mean and the within-cluster dispersion: +ratio of the between-clusters dispersion mean and the within-cluster +dispersion: .. math:: s = \frac{\mathrm{tr}(B_k)}{\mathrm{tr}(W_k)} \times \frac{n_E - k}{k - 1} @@ -2023,17 +2083,22 @@ matrix defined by: .. math:: B_k = \sum_{q=1}^k n_q (c_q - c_E) (c_q - c_E)^T -with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the center -of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and :math:`n_q` the -number of points in cluster :math:`q`. +with :math:`C_q` the set of points in cluster :math:`q`, :math:`c_q` the +center of cluster :math:`q`, :math:`c_E` the center of :math:`E`, and +:math:`n_q` the number of points in cluster :math:`q`. -.. topic:: References +|details-end| - * Caliński, T., & Harabasz, J. (1974). - `"A Dendrite Method for Cluster Analysis" - `_. - :doi:`Communications in Statistics-theory and Methods 3: 1-27 <10.1080/03610927408827101>`. +|details-start| +**References** +|details-split| +* Caliński, T., & Harabasz, J. (1974). `"A Dendrite Method for Cluster Analysis" + `_. + :doi:`Communications in Statistics-theory and Methods 3: 1-27 + <10.1080/03610927408827101>`. + +|details-end| .. _davies-bouldin_index: @@ -2066,23 +2131,24 @@ cluster analysis as follows: 0.666... -Advantages -~~~~~~~~~~ +.. topic:: Advantages: + + - The computation of Davies-Bouldin is simpler than that of Silhouette scores. + - The index is solely based on quantities and features inherent to the dataset + as its computation only uses point-wise distances. -- The computation of Davies-Bouldin is simpler than that of Silhouette scores. -- The index is solely based on quantities and features inherent to the dataset - as its computation only uses point-wise distances. +.. topic:: Drawbacks: -Drawbacks -~~~~~~~~~ + - The Davies-Boulding index is generally higher for convex clusters than other + concepts of clusters, such as density based clusters like those obtained + from DBSCAN. + - The usage of centroid distance limits the distance metric to Euclidean + space. -- The Davies-Boulding index is generally higher for convex clusters than other - concepts of clusters, such as density based clusters like those obtained from - DBSCAN. -- The usage of centroid distance limits the distance metric to Euclidean space. -Mathematical formulation -~~~~~~~~~~~~~~~~~~~~~~~~ +|details-start| +**Mathematical formulation** +|details-split| The index is defined as the average similarity between each cluster :math:`C_i` for :math:`i=1, ..., k` and its most similar one :math:`C_j`. In the context of @@ -2090,34 +2156,38 @@ this index, similarity is defined as a measure :math:`R_{ij}` that trades off: - :math:`s_i`, the average distance between each point of cluster :math:`i` and the centroid of that cluster -- also know as cluster diameter. -- :math:`d_{ij}`, the distance between cluster centroids :math:`i` and :math:`j`. +- :math:`d_{ij}`, the distance between cluster centroids :math:`i` and + :math:`j`. A simple choice to construct :math:`R_{ij}` so that it is nonnegative and symmetric is: .. math:: - R_{ij} = \frac{s_i + s_j}{d_{ij}} + R_{ij} = \frac{s_i + s_j}{d_{ij}} Then the Davies-Bouldin index is defined as: .. math:: - DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} + DB = \frac{1}{k} \sum_{i=1}^k \max_{i \neq j} R_{ij} +|details-end| -.. topic:: References +|details-start| +**References** +|details-split| - * Davies, David L.; Bouldin, Donald W. (1979). - :doi:`"A Cluster Separation Measure" <10.1109/TPAMI.1979.4766909>` - IEEE Transactions on Pattern Analysis and Machine Intelligence. - PAMI-1 (2): 224-227. +* Davies, David L.; Bouldin, Donald W. (1979). :doi:`"A Cluster Separation + Measure" <10.1109/TPAMI.1979.4766909>` IEEE Transactions on Pattern Analysis + and Machine Intelligence. PAMI-1 (2): 224-227. - * Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). - :doi:`"On Clustering Validation Techniques" <10.1023/A:1012801612483>` - Journal of Intelligent Information Systems, 17(2-3), 107-145. +* Halkidi, Maria; Batistakis, Yannis; Vazirgiannis, Michalis (2001). :doi:`"On + Clustering Validation Techniques" <10.1023/A:1012801612483>` Journal of + Intelligent Information Systems, 17(2-3), 107-145. - * `Wikipedia entry for Davies-Bouldin index - `_. +* `Wikipedia entry for Davies-Bouldin index + `_. +|details-end| .. _contingency_matrix: @@ -2150,30 +2220,32 @@ contingency matrix where the order of rows and columns correspond to a list of classes. -Advantages -~~~~~~~~~~ +.. topic:: Advantages: -- Allows to examine the spread of each true cluster across predicted - clusters and vice versa. + - Allows to examine the spread of each true cluster across predicted clusters + and vice versa. -- The contingency table calculated is typically utilized in the calculation - of a similarity statistic (like the others listed in this document) between - the two clusterings. + - The contingency table calculated is typically utilized in the calculation of + a similarity statistic (like the others listed in this document) between the + two clusterings. -Drawbacks -~~~~~~~~~ +.. topic:: Drawbacks: -- Contingency matrix is easy to interpret for a small number of clusters, but - becomes very hard to interpret for a large number of clusters. + - Contingency matrix is easy to interpret for a small number of clusters, but + becomes very hard to interpret for a large number of clusters. -- It doesn't give a single metric to use as an objective for clustering - optimisation. + - It doesn't give a single metric to use as an objective for clustering + optimisation. -.. topic:: References +|details-start| +**References** +|details-split| - * `Wikipedia entry for contingency matrix - `_ +* `Wikipedia entry for contingency matrix + `_ + +|details-end| .. _pair_confusion_matrix: @@ -2251,7 +2323,11 @@ diagonal entries:: array([[ 0, 0], [12, 0]]) -.. topic:: References +|details-start| +**References** +|details-split| + + * :doi:`"Comparing Partitions" <10.1007/BF01908075>` L. Hubert and P. Arabie, + Journal of Classification 1985 - * :doi:`"Comparing Partitions" <10.1007/BF01908075>` - L. Hubert and P. Arabie, Journal of Classification 1985 +|details-end| From cac6d4ee913a9b71130c6bed7a919a5d4212072e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= Date: Tue, 27 Feb 2024 16:58:05 +0100 Subject: [PATCH 15/23] MAINT cleanup utils.__init__: move masking tools into utils._mask (#28515) Co-authored-by: Thomas J. Fan --- sklearn/ensemble/_bagging.py | 2 +- sklearn/linear_model/_huber.py | 2 +- sklearn/utils/__init__.py | 117 +-------------------------------- sklearn/utils/_mask.py | 115 ++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 117 deletions(-) diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index 878e2ea809c01..e0ff0b9509c3b 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -21,8 +21,8 @@ _safe_indexing, check_random_state, column_or_1d, - indices_to_mask, ) +from ..utils._mask import indices_to_mask from ..utils._param_validation import HasMethods, Interval, RealNotInt from ..utils._tags import _safe_tags from ..utils.metadata_routing import ( diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py index 554f693061116..4c60a2de8cb86 100644 --- a/sklearn/linear_model/_huber.py +++ b/sklearn/linear_model/_huber.py @@ -7,7 +7,7 @@ from scipy import optimize from ..base import BaseEstimator, RegressorMixin, _fit_context -from ..utils import axis0_safe_slice +from ..utils._mask import axis0_safe_slice from ..utils._param_validation import Interval from ..utils.extmath import safe_sparse_dot from ..utils.optimize import _check_optimize_result diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 9a2481393271a..f44c0ca078777 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -19,6 +19,7 @@ from ._bunch import Bunch from ._chunking import gen_batches, gen_even_slices from ._estimator_html_repr import estimator_html_repr +from ._mask import safe_mask from ._param_validation import Interval, validate_params from .class_weight import compute_class_weight, compute_sample_weight from .deprecation import deprecated @@ -64,7 +65,6 @@ "check_scalar", "indexable", "check_symmetric", - "indices_to_mask", "deprecated", "parallel_backend", "register_parallel_backend", @@ -76,6 +76,7 @@ "Bunch", "metadata_routing", "safe_sqr", + "safe_mask", "gen_batches", "gen_even_slices", ] @@ -85,88 +86,6 @@ _IS_WASM = platform.machine() in ["wasm32", "wasm64"] -@validate_params( - { - "X": ["array-like", "sparse matrix"], - "mask": ["array-like"], - }, - prefer_skip_nested_validation=True, -) -def safe_mask(X, mask): - """Return a mask which is safe to use on X. - - Parameters - ---------- - X : {array-like, sparse matrix} - Data on which to apply mask. - - mask : array-like - Mask to be used on X. - - Returns - ------- - mask : ndarray - Array that is safe to use on X. - - Examples - -------- - >>> from sklearn.utils import safe_mask - >>> from scipy.sparse import csr_matrix - >>> data = csr_matrix([[1], [2], [3], [4], [5]]) - >>> condition = [False, True, True, False, True] - >>> mask = safe_mask(data, condition) - >>> data[mask].toarray() - array([[2], - [3], - [5]]) - """ - mask = np.asarray(mask) - if np.issubdtype(mask.dtype, np.signedinteger): - return mask - - if hasattr(X, "toarray"): - ind = np.arange(mask.shape[0]) - mask = ind[mask] - return mask - - -def axis0_safe_slice(X, mask, len_mask): - """Return a mask which is safer to use on X than safe_mask. - - This mask is safer than safe_mask since it returns an - empty array, when a sparse matrix is sliced with a boolean mask - with all False, instead of raising an unhelpful error in older - versions of SciPy. - - See: https://github.com/scipy/scipy/issues/5361 - - Also note that we can avoid doing the dot product by checking if - the len_mask is not zero in _huber_loss_and_gradient but this - is not going to be the bottleneck, since the number of outliers - and non_outliers are typically non-zero and it makes the code - tougher to follow. - - Parameters - ---------- - X : {array-like, sparse matrix} - Data on which to apply mask. - - mask : ndarray - Mask to be used on X. - - len_mask : int - The length of the mask. - - Returns - ------- - mask : ndarray - Array that is safe to use on X. - """ - if len_mask != 0: - return X[safe_mask(X, mask), :] - return np.zeros(shape=(0, X.shape[1])) - - def _array_indexing(array, key, key_dtype, axis): """Index an array or scipy.sparse consistently across NumPy version.""" if issparse(array) and key_dtype == "bool": @@ -806,38 +725,6 @@ def _to_object_array(sequence): return out -def indices_to_mask(indices, mask_length): - """Convert list of indices to boolean mask. - - Parameters - ---------- - indices : list-like - List of integers treated as indices. - mask_length : int - Length of boolean mask to be generated. - This parameter must be greater than max(indices). - - Returns - ------- - mask : 1d boolean nd-array - Boolean array that is True where indices are present, else False. - - Examples - -------- - >>> from sklearn.utils import indices_to_mask - >>> indices = [1, 2 , 3, 4] - >>> indices_to_mask(indices, 5) - array([False, True, True, True, True]) - """ - if mask_length <= np.max(indices): - raise ValueError("mask_length must be greater than max(indices)") - - mask = np.zeros(mask_length, dtype=bool) - mask[indices] = True - - return mask - - def _message_with_time(source, message, time): """Create one line message for logging purposes. diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py index 15ea55e7520e1..0a66dc5a20a81 100644 --- a/sklearn/utils/_mask.py +++ b/sklearn/utils/_mask.py @@ -4,6 +4,7 @@ from scipy import sparse as sp from ._missing import is_scalar_nan +from ._param_validation import validate_params from .fixes import _object_dtype_isnan @@ -61,3 +62,117 @@ def _get_mask(X, value_to_mask): ) return Xt_sparse + + +@validate_params( + { + "X": ["array-like", "sparse matrix"], + "mask": ["array-like"], + }, + prefer_skip_nested_validation=True, +) +def safe_mask(X, mask): + """Return a mask which is safe to use on X. + + Parameters + ---------- + X : {array-like, sparse matrix} + Data on which to apply mask. + + mask : array-like + Mask to be used on X. + + Returns + ------- + mask : ndarray + Array that is safe to use on X. + + Examples + -------- + >>> from sklearn.utils import safe_mask + >>> from scipy.sparse import csr_matrix + >>> data = csr_matrix([[1], [2], [3], [4], [5]]) + >>> condition = [False, True, True, False, True] + >>> mask = safe_mask(data, condition) + >>> data[mask].toarray() + array([[2], + [3], + [5]]) + """ + mask = np.asarray(mask) + if np.issubdtype(mask.dtype, np.signedinteger): + return mask + + if hasattr(X, "toarray"): + ind = np.arange(mask.shape[0]) + mask = ind[mask] + return mask + + +def axis0_safe_slice(X, mask, len_mask): + """Return a mask which is safer to use on X than safe_mask. + + This mask is safer than safe_mask since it returns an + empty array, when a sparse matrix is sliced with a boolean mask + with all False, instead of raising an unhelpful error in older + versions of SciPy. + + See: https://github.com/scipy/scipy/issues/5361 + + Also note that we can avoid doing the dot product by checking if + the len_mask is not zero in _huber_loss_and_gradient but this + is not going to be the bottleneck, since the number of outliers + and non_outliers are typically non-zero and it makes the code + tougher to follow. + + Parameters + ---------- + X : {array-like, sparse matrix} + Data on which to apply mask. + + mask : ndarray + Mask to be used on X. + + len_mask : int + The length of the mask. + + Returns + ------- + mask : ndarray + Array that is safe to use on X. + """ + if len_mask != 0: + return X[safe_mask(X, mask), :] + return np.zeros(shape=(0, X.shape[1])) + + +def indices_to_mask(indices, mask_length): + """Convert list of indices to boolean mask. + + Parameters + ---------- + indices : list-like + List of integers treated as indices. + mask_length : int + Length of boolean mask to be generated. + This parameter must be greater than max(indices). + + Returns + ------- + mask : 1d boolean nd-array + Boolean array that is True where indices are present, else False. + + Examples + -------- + >>> from sklearn.utils._mask import indices_to_mask + >>> indices = [1, 2 , 3, 4] + >>> indices_to_mask(indices, 5) + array([False, True, True, True, True]) + """ + if mask_length <= np.max(indices): + raise ValueError("mask_length must be greater than max(indices)") + + mask = np.zeros(mask_length, dtype=bool) + mask[indices] = True + + return mask From 9dee432bb8b5bb3dc4c6c60857ce2b37f33875e9 Mon Sep 17 00:00:00 2001 From: Thanh Lam DANG <70220760+lamdang2k@users.noreply.github.com> Date: Tue, 27 Feb 2024 21:18:00 +0100 Subject: [PATCH 16/23] DOC Add a note on the min_samples parameter of HDBSCAN (#28465) --- sklearn/cluster/_hdbscan/hdbscan.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py index fc51f10cffba0..380448f1f8589 100644 --- a/sklearn/cluster/_hdbscan/hdbscan.py +++ b/sklearn/cluster/_hdbscan/hdbscan.py @@ -594,6 +594,14 @@ class HDBSCAN(ClusterMixin, BaseEstimator): OPTICS : Ordering Points To Identify the Clustering Structure. Birch : Memory-efficient, online-learning algorithm. + Notes + ----- + The `min_samples` parameter includes the point itself, whereas the implementation in + `scikit-learn-contrib/hdbscan `_ + does not. To get the same results in both versions, the value of `min_samples` here + must be 1 greater than the value used in `scikit-learn-contrib/hdbscan + `_. + References ---------- From 38b39a403179dd67b325dc2fe3da849feda7f557 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 28 Feb 2024 20:05:29 +1100 Subject: [PATCH 17/23] DOC Fix `GroupKFold` docstring (#28540) --- sklearn/model_selection/_split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 3f88285117d6b..a1c5194d1dbef 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -536,7 +536,7 @@ class GroupKFold(GroupsConsumerMixin, _BaseKFold): number of distinct groups has to be at least equal to the number of folds). The folds are approximately balanced in the sense that the number of - distinct groups is approximately the same in each fold. + samples is approximately the same in each test fold. Read more in the :ref:`User Guide `. From a45d10635878d5124da5bb3b31744678ada324da Mon Sep 17 00:00:00 2001 From: SarahRemus <60743257+SarahRemus@users.noreply.github.com> Date: Wed, 28 Feb 2024 13:52:15 +0100 Subject: [PATCH 18/23] MAINT Parameters validation for decomposition.dict_learning_online (#25010) Co-authored-by: ge72mum Co-authored-by: jeremie du boisberranger --- sklearn/decomposition/_dict_learning.py | 11 ++++++++++- sklearn/tests/test_public_functions.py | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 51350aa5e05bd..177d6960033da 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -664,6 +664,15 @@ def _dict_learning( return code, dictionary, errors +@validate_params( + { + "X": ["array-like"], + "return_code": ["boolean"], + "method": [StrOptions({"cd", "lars"})], + "method_max_iter": [Interval(Integral, 0, None, closed="left")], + }, + prefer_skip_nested_validation=False, +) def dict_learning_online( X, n_components=2, @@ -704,7 +713,7 @@ def dict_learning_online( Parameters ---------- - X : ndarray of shape (n_samples, n_features) + X : array-like of shape (n_samples, n_features) Data matrix. n_components : int or None, default=2 diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 22e70b39e1c6f..d693d66799dbf 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -355,6 +355,10 @@ def test_function_param_validation(func_module): ("sklearn.covariance.ledoit_wolf", "sklearn.covariance.LedoitWolf"), ("sklearn.covariance.oas", "sklearn.covariance.OAS"), ("sklearn.decomposition.dict_learning", "sklearn.decomposition.DictionaryLearning"), + ( + "sklearn.decomposition.dict_learning_online", + "sklearn.decomposition.MiniBatchDictionaryLearning", + ), ("sklearn.decomposition.fastica", "sklearn.decomposition.FastICA"), ("sklearn.decomposition.non_negative_factorization", "sklearn.decomposition.NMF"), ("sklearn.preprocessing.maxabs_scale", "sklearn.preprocessing.MaxAbsScaler"), From 24452ef42f17b31487f545c8b2ba977177468511 Mon Sep 17 00:00:00 2001 From: crispinlogan <36704697+crispinlogan@users.noreply.github.com> Date: Wed, 28 Feb 2024 13:01:20 +0000 Subject: [PATCH 19/23] MAINT Add parameter validation to locally_linear_embedding (#25581) Co-authored-by: jeremie du boisberranger --- sklearn/manifold/_locally_linear.py | 275 ++++++++++++++----------- sklearn/tests/test_public_functions.py | 3 +- 2 files changed, 159 insertions(+), 119 deletions(-) diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py index 41d0c233b8f76..18f7f504a1e31 100644 --- a/sklearn/manifold/_locally_linear.py +++ b/sklearn/manifold/_locally_linear.py @@ -21,7 +21,7 @@ from ..neighbors import NearestNeighbors from ..utils import check_array, check_random_state from ..utils._arpack import _init_arpack_v0 -from ..utils._param_validation import Interval, StrOptions +from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils.extmath import stable_cumsum from ..utils.validation import FLOAT_DTYPES, check_is_fitted @@ -198,7 +198,7 @@ def null_space( raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver) -def locally_linear_embedding( +def _locally_linear_embedding( X, *, n_neighbors, @@ -213,118 +213,6 @@ def locally_linear_embedding( random_state=None, n_jobs=None, ): - """Perform a Locally Linear Embedding analysis on the data. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - X : {array-like, NearestNeighbors} - Sample data, shape = (n_samples, n_features), in the form of a - numpy array or a NearestNeighbors object. - - n_neighbors : int - Number of neighbors to consider for each point. - - n_components : int - Number of coordinates for the manifold. - - reg : float, default=1e-3 - Regularization constant, multiplies the trace of the local covariance - matrix of the distances. - - eigen_solver : {'auto', 'arpack', 'dense'}, default='auto' - auto : algorithm will attempt to choose the best method for input data - - arpack : use arnoldi iteration in shift-invert mode. - For this method, M may be a dense matrix, sparse matrix, - or general linear operator. - Warning: ARPACK can be unstable for some problems. It is - best to try several random seeds in order to check results. - - dense : use standard dense matrix operations for the eigenvalue - decomposition. For this method, M must be an array - or matrix type. This method should be avoided for - large problems. - - tol : float, default=1e-6 - Tolerance for 'arpack' method - Not used if eigen_solver=='dense'. - - max_iter : int, default=100 - Maximum number of iterations for the arpack solver. - - method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard' - standard : use the standard locally linear embedding algorithm. - see reference [1]_ - hessian : use the Hessian eigenmap method. This method requires - n_neighbors > n_components * (1 + (n_components + 1) / 2. - see reference [2]_ - modified : use the modified locally linear embedding algorithm. - see reference [3]_ - ltsa : use local tangent space alignment algorithm - see reference [4]_ - - hessian_tol : float, default=1e-4 - Tolerance for Hessian eigenmapping method. - Only used if method == 'hessian'. - - modified_tol : float, default=1e-12 - Tolerance for modified LLE method. - Only used if method == 'modified'. - - random_state : int, RandomState instance, default=None - Determines the random number generator when ``solver`` == 'arpack'. - Pass an int for reproducible results across multiple function calls. - See :term:`Glossary `. - - n_jobs : int or None, default=None - The number of parallel jobs to run for neighbors search. - ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. - ``-1`` means using all processors. See :term:`Glossary ` - for more details. - - Returns - ------- - Y : array-like, shape [n_samples, n_components] - Embedding vectors. - - squared_error : float - Reconstruction error for the embedding vectors. Equivalent to - ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights. - - References - ---------- - - .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction - by locally linear embedding. Science 290:2323 (2000). - .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally - linear embedding techniques for high-dimensional data. - Proc Natl Acad Sci U S A. 100:5591 (2003). - .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear - Embedding Using Multiple Weights. - `_ - .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear - dimensionality reduction via tangent space alignment. - Journal of Shanghai Univ. 8:406 (2004) - - Examples - -------- - >>> from sklearn.datasets import load_digits - >>> from sklearn.manifold import locally_linear_embedding - >>> X, _ = load_digits(return_X_y=True) - >>> X.shape - (1797, 64) - >>> embedding, _ = locally_linear_embedding(X[:100],n_neighbors=5, n_components=2) - >>> embedding.shape - (100, 2) - """ - if eigen_solver not in ("auto", "arpack", "dense"): - raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver) - - if method not in ("standard", "hessian", "modified", "ltsa"): - raise ValueError("unrecognized method '%s'" % method) - nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs) nbrs.fit(X) X = nbrs._fit_X @@ -341,9 +229,6 @@ def locally_linear_embedding( % (N, n_neighbors) ) - if n_neighbors <= 0: - raise ValueError("n_neighbors must be positive") - M_sparse = eigen_solver != "dense" if method == "standard": @@ -561,6 +446,160 @@ def locally_linear_embedding( ) +@validate_params( + { + "X": ["array-like", NearestNeighbors], + "n_neighbors": [Interval(Integral, 1, None, closed="left")], + "n_components": [Interval(Integral, 1, None, closed="left")], + "reg": [Interval(Real, 0, None, closed="left")], + "eigen_solver": [StrOptions({"auto", "arpack", "dense"})], + "tol": [Interval(Real, 0, None, closed="left")], + "max_iter": [Interval(Integral, 1, None, closed="left")], + "method": [StrOptions({"standard", "hessian", "modified", "ltsa"})], + "hessian_tol": [Interval(Real, 0, None, closed="left")], + "modified_tol": [Interval(Real, 0, None, closed="left")], + "random_state": ["random_state"], + "n_jobs": [None, Integral], + }, + prefer_skip_nested_validation=True, +) +def locally_linear_embedding( + X, + *, + n_neighbors, + n_components, + reg=1e-3, + eigen_solver="auto", + tol=1e-6, + max_iter=100, + method="standard", + hessian_tol=1e-4, + modified_tol=1e-12, + random_state=None, + n_jobs=None, +): + """Perform a Locally Linear Embedding analysis on the data. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + X : {array-like, NearestNeighbors} + Sample data, shape = (n_samples, n_features), in the form of a + numpy array or a NearestNeighbors object. + + n_neighbors : int + Number of neighbors to consider for each point. + + n_components : int + Number of coordinates for the manifold. + + reg : float, default=1e-3 + Regularization constant, multiplies the trace of the local covariance + matrix of the distances. + + eigen_solver : {'auto', 'arpack', 'dense'}, default='auto' + auto : algorithm will attempt to choose the best method for input data + + arpack : use arnoldi iteration in shift-invert mode. + For this method, M may be a dense matrix, sparse matrix, + or general linear operator. + Warning: ARPACK can be unstable for some problems. It is + best to try several random seeds in order to check results. + + dense : use standard dense matrix operations for the eigenvalue + decomposition. For this method, M must be an array + or matrix type. This method should be avoided for + large problems. + + tol : float, default=1e-6 + Tolerance for 'arpack' method + Not used if eigen_solver=='dense'. + + max_iter : int, default=100 + Maximum number of iterations for the arpack solver. + + method : {'standard', 'hessian', 'modified', 'ltsa'}, default='standard' + standard : use the standard locally linear embedding algorithm. + see reference [1]_ + hessian : use the Hessian eigenmap method. This method requires + n_neighbors > n_components * (1 + (n_components + 1) / 2. + see reference [2]_ + modified : use the modified locally linear embedding algorithm. + see reference [3]_ + ltsa : use local tangent space alignment algorithm + see reference [4]_ + + hessian_tol : float, default=1e-4 + Tolerance for Hessian eigenmapping method. + Only used if method == 'hessian'. + + modified_tol : float, default=1e-12 + Tolerance for modified LLE method. + Only used if method == 'modified'. + + random_state : int, RandomState instance, default=None + Determines the random number generator when ``solver`` == 'arpack'. + Pass an int for reproducible results across multiple function calls. + See :term:`Glossary `. + + n_jobs : int or None, default=None + The number of parallel jobs to run for neighbors search. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + + Returns + ------- + Y : ndarray of shape (n_samples, n_components) + Embedding vectors. + + squared_error : float + Reconstruction error for the embedding vectors. Equivalent to + ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights. + + References + ---------- + + .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction + by locally linear embedding. Science 290:2323 (2000). + .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally + linear embedding techniques for high-dimensional data. + Proc Natl Acad Sci U S A. 100:5591 (2003). + .. [3] `Zhang, Z. & Wang, J. MLLE: Modified Locally Linear + Embedding Using Multiple Weights. + `_ + .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear + dimensionality reduction via tangent space alignment. + Journal of Shanghai Univ. 8:406 (2004) + + Examples + -------- + >>> from sklearn.datasets import load_digits + >>> from sklearn.manifold import locally_linear_embedding + >>> X, _ = load_digits(return_X_y=True) + >>> X.shape + (1797, 64) + >>> embedding, _ = locally_linear_embedding(X[:100],n_neighbors=5, n_components=2) + >>> embedding.shape + (100, 2) + """ + return _locally_linear_embedding( + X=X, + n_neighbors=n_neighbors, + n_components=n_components, + reg=reg, + eigen_solver=eigen_solver, + tol=tol, + max_iter=max_iter, + method=method, + hessian_tol=hessian_tol, + modified_tol=modified_tol, + random_state=random_state, + n_jobs=n_jobs, + ) + + class LocallyLinearEmbedding( ClassNamePrefixFeaturesOutMixin, TransformerMixin, @@ -753,7 +792,7 @@ def _fit_transform(self, X): random_state = check_random_state(self.random_state) X = self._validate_data(X, dtype=float) self.nbrs_.fit(X) - self.embedding_, self.reconstruction_error_ = locally_linear_embedding( + self.embedding_, self.reconstruction_error_ = _locally_linear_embedding( X=self.nbrs_, n_neighbors=self.n_neighbors, n_components=self.n_components, diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index d693d66799dbf..76f8fa6921c32 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -207,9 +207,10 @@ def _check_function_param_validation( "sklearn.linear_model.orthogonal_mp", "sklearn.linear_model.orthogonal_mp_gram", "sklearn.linear_model.ridge_regression", + "sklearn.manifold.locally_linear_embedding", + "sklearn.manifold.smacof", "sklearn.manifold.trustworthiness", "sklearn.metrics.accuracy_score", - "sklearn.manifold.smacof", "sklearn.metrics.auc", "sklearn.metrics.average_precision_score", "sklearn.metrics.balanced_accuracy_score", From a25e3efabe7964e45dce60af289660761ed6b91e Mon Sep 17 00:00:00 2001 From: crispinlogan <36704697+crispinlogan@users.noreply.github.com> Date: Wed, 28 Feb 2024 14:00:09 +0000 Subject: [PATCH 20/23] MAINT Parameters validation for `sklearn.manifold.spectral_embedding` (#25579) Co-authored-by: jeremie du boisberranger --- sklearn/cluster/_spectral.py | 4 +-- sklearn/manifold/_spectral_embedding.py | 46 ++++++++++++++++++++----- sklearn/tests/test_public_functions.py | 1 + 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py index 69bc7bc87d0ca..d323a6b8afd03 100644 --- a/sklearn/cluster/_spectral.py +++ b/sklearn/cluster/_spectral.py @@ -14,7 +14,7 @@ from scipy.sparse import csc_matrix from ..base import BaseEstimator, ClusterMixin, _fit_context -from ..manifold import spectral_embedding +from ..manifold._spectral_embedding import _spectral_embedding from ..metrics.pairwise import KERNEL_PARAMS, pairwise_kernels from ..neighbors import NearestNeighbors, kneighbors_graph from ..utils import as_float_array, check_random_state @@ -741,7 +741,7 @@ def fit(self, X, y=None): # The first eigenvector is constant only for fully connected graphs # and should be kept for spectral clustering (drop_first = False) # See spectral_embedding documentation. - maps = spectral_embedding( + maps = _spectral_embedding( self.affinity_matrix_, n_components=n_components, eigen_solver=self.eigen_solver, diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py index a2839954c117a..f1707fad1c950 100644 --- a/sklearn/manifold/_spectral_embedding.py +++ b/sklearn/manifold/_spectral_embedding.py @@ -23,7 +23,7 @@ check_symmetric, ) from ..utils._arpack import _init_arpack_v0 -from ..utils._param_validation import Interval, StrOptions +from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils.extmath import _deterministic_vector_sign_flip from ..utils.fixes import laplacian as csgraph_laplacian from ..utils.fixes import parse_version, sp_version @@ -152,6 +152,18 @@ def _set_diag(laplacian, value, norm_laplacian): return laplacian +@validate_params( + { + "adjacency": ["array-like", "sparse matrix"], + "n_components": [Interval(Integral, 1, None, closed="left")], + "eigen_solver": [StrOptions({"arpack", "lobpcg", "amg"}), None], + "random_state": ["random_state"], + "eigen_tol": [Interval(Real, 0, None, closed="left"), StrOptions({"auto"})], + "norm_laplacian": ["boolean"], + "drop_first": ["boolean"], + }, + prefer_skip_nested_validation=True, +) def spectral_embedding( adjacency, *, @@ -272,6 +284,29 @@ def spectral_embedding( >>> embedding.shape (100, 2) """ + random_state = check_random_state(random_state) + + return _spectral_embedding( + adjacency, + n_components=n_components, + eigen_solver=eigen_solver, + random_state=random_state, + eigen_tol=eigen_tol, + norm_laplacian=norm_laplacian, + drop_first=drop_first, + ) + + +def _spectral_embedding( + adjacency, + *, + n_components=8, + eigen_solver=None, + random_state=None, + eigen_tol="auto", + norm_laplacian=True, + drop_first=True, +): adjacency = check_symmetric(adjacency) if eigen_solver == "amg": @@ -284,13 +319,6 @@ def spectral_embedding( if eigen_solver is None: eigen_solver = "arpack" - elif eigen_solver not in ("arpack", "lobpcg", "amg"): - raise ValueError( - "Unknown value for eigen_solver: '%s'." - "Should be 'amg', 'arpack', or 'lobpcg'" % eigen_solver - ) - - random_state = check_random_state(random_state) n_nodes = adjacency.shape[0] # Whether to drop the first eigenvector @@ -714,7 +742,7 @@ def fit(self, X, y=None): random_state = check_random_state(self.random_state) affinity_matrix = self._get_affinity_matrix(X) - self.embedding_ = spectral_embedding( + self.embedding_ = _spectral_embedding( affinity_matrix, n_components=self.n_components, eigen_solver=self.eigen_solver, diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 76f8fa6921c32..41629aa189941 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -209,6 +209,7 @@ def _check_function_param_validation( "sklearn.linear_model.ridge_regression", "sklearn.manifold.locally_linear_embedding", "sklearn.manifold.smacof", + "sklearn.manifold.spectral_embedding", "sklearn.manifold.trustworthiness", "sklearn.metrics.accuracy_score", "sklearn.metrics.auc", From 783e98fa31e9c5e75bd7be5fded8561302fa9dbf Mon Sep 17 00:00:00 2001 From: Brendan Lu Date: Thu, 29 Feb 2024 02:35:45 +1100 Subject: [PATCH 21/23] FIX correct NeighborhoodComponentsAnalysis (NCA) `_n_features_out` value (#28306) --- doc/whats_new/v1.4.rst | 10 ++++++++++ sklearn/neighbors/_nca.py | 6 +++++- sklearn/neighbors/tests/test_nca.py | 20 ++++++++++++++++---- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 7092c53da1a27..46af56856a6b4 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -27,6 +27,16 @@ Metadata Routing attributes. :pr:`28435` by `Adrin Jalali`_. +Changelog +--------- + +:mod:`sklearn.neighbors` +........................ + +- |Fix| Fixes :class:`neighbors.NeighborhoodComponentsAnalysis` such that + `get_feature_names_out` returns the correct number of feature names. + :pr:`28306` by :user:`Brendan Lu `. + .. _changes_1_4_1: Version 1.4.1 diff --git a/sklearn/neighbors/_nca.py b/sklearn/neighbors/_nca.py index d302aef0dc0a2..b304c3fb9792f 100644 --- a/sklearn/neighbors/_nca.py +++ b/sklearn/neighbors/_nca.py @@ -323,7 +323,6 @@ def fit(self, X, y): # Reshape the solution found by the optimizer self.components_ = opt_result.x.reshape(-1, X.shape[1]) - self._n_features_out = self.components_.shape[1] # Stop timer t_train = time.time() - t_train @@ -523,3 +522,8 @@ def _loss_grad_lbfgs(self, transformation, X, same_class_mask, sign=1.0): def _more_tags(self): return {"requires_y": True} + + @property + def _n_features_out(self): + """Number of transformed output features.""" + return self.components_.shape[0] diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py index 7dedd97ff423b..a3eb5a8c6de17 100644 --- a/sklearn/neighbors/tests/test_nca.py +++ b/sklearn/neighbors/tests/test_nca.py @@ -531,18 +531,30 @@ def test_parameters_valid_types(param, value): nca.fit(X, y) -def test_nca_feature_names_out(): - """Check `get_feature_names_out` for `NeighborhoodComponentsAnalysis`.""" +@pytest.mark.parametrize("n_components", [None, 2]) +def test_nca_feature_names_out(n_components): + """Check `get_feature_names_out` for `NeighborhoodComponentsAnalysis`. + + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/28293 + """ X = iris_data y = iris_target - est = NeighborhoodComponentsAnalysis().fit(X, y) + est = NeighborhoodComponentsAnalysis(n_components=n_components).fit(X, y) names_out = est.get_feature_names_out() class_name_lower = est.__class__.__name__.lower() + + if n_components is not None: + expected_n_features = n_components + else: + expected_n_features = X.shape[1] + expected_names_out = np.array( - [f"{class_name_lower}{i}" for i in range(est.components_.shape[1])], + [f"{class_name_lower}{i}" for i in range(expected_n_features)], dtype=object, ) + assert_array_equal(names_out, expected_names_out) From f3310d2f5a1c4d6e2572cc2b2aea924179453a5a Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Thu, 29 Feb 2024 00:54:16 +0800 Subject: [PATCH 22/23] FIX make 'array-like' reject sparse containers in parameter validation (#28101) Co-authored-by: jeremie du boisberranger --- doc/glossary.rst | 1 + sklearn/cluster/_agglomerative.py | 8 ++++---- sklearn/model_selection/_validation.py | 4 ++-- sklearn/neighbors/_graph.py | 8 ++++---- sklearn/preprocessing/_label.py | 4 ++-- sklearn/utils/__init__.py | 6 +++--- sklearn/utils/tests/test_param_validation.py | 5 +++++ sklearn/utils/validation.py | 9 +++++++-- 8 files changed, 28 insertions(+), 17 deletions(-) diff --git a/doc/glossary.rst b/doc/glossary.rst index 75507d977b363..84a628b0f716d 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -66,6 +66,7 @@ General Concepts It excludes: * a :term:`sparse matrix` + * a sparse array * an iterator * a generator diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 884d1605e70c3..2da9d8c5a0f43 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -809,7 +809,7 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): By default, no caching is done. If a string is given, it is the path to the caching directory. - connectivity : array-like or callable, default=None + connectivity : array-like, sparse matrix, or callable, default=None Connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms @@ -929,7 +929,7 @@ class AgglomerativeClustering(ClusterMixin, BaseEstimator): Hidden(None), ], "memory": [str, HasMethods("cache"), None], - "connectivity": ["array-like", callable, None], + "connectivity": ["array-like", "sparse matrix", callable, None], "compute_full_tree": [StrOptions({"auto"}), "boolean"], "linkage": [StrOptions(set(_TREE_BUILDERS.keys()))], "distance_threshold": [Interval(Real, 0, None, closed="left"), None], @@ -1151,7 +1151,7 @@ class FeatureAgglomeration( By default, no caching is done. If a string is given, it is the path to the caching directory. - connectivity : array-like or callable, default=None + connectivity : array-like, sparse matrix, or callable, default=None Connectivity matrix. Defines for each feature the neighboring features following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms @@ -1275,7 +1275,7 @@ class FeatureAgglomeration( Hidden(None), ], "memory": [str, HasMethods("cache"), None], - "connectivity": ["array-like", callable, None], + "connectivity": ["array-like", "sparse matrix", callable, None], "compute_full_tree": [StrOptions({"auto"}), "boolean"], "linkage": [StrOptions(set(_TREE_BUILDERS.keys()))], "pooling_func": [callable], diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index df147ce18abc1..176627ace91d4 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -1030,7 +1030,7 @@ def _score(estimator, X_test, y_test, scorer, score_params, error_score="raise") { "estimator": [HasMethods(["fit", "predict"])], "X": ["array-like", "sparse matrix"], - "y": ["array-like", None], + "y": ["array-like", "sparse matrix", None], "groups": ["array-like", None], "cv": ["cv_object"], "n_jobs": [Integral, None], @@ -1087,7 +1087,7 @@ def cross_val_predict( X : {array-like, sparse matrix} of shape (n_samples, n_features) The data to fit. Can be, for example a list, or an array at least 2d. - y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ + y : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs), \ default=None The target variable to try to predict in the case of supervised learning. diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py index 2ff27d07514e0..d0456fc59e542 100644 --- a/sklearn/neighbors/_graph.py +++ b/sklearn/neighbors/_graph.py @@ -45,7 +45,7 @@ def _query_include_self(X, include_self, mode): @validate_params( { - "X": ["array-like", KNeighborsMixin], + "X": ["array-like", "sparse matrix", KNeighborsMixin], "n_neighbors": [Interval(Integral, 1, None, closed="left")], "mode": [StrOptions({"connectivity", "distance"})], "metric": [StrOptions(set(itertools.chain(*VALID_METRICS.values()))), callable], @@ -73,7 +73,7 @@ def kneighbors_graph( Parameters ---------- - X : array-like of shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) Sample data. n_neighbors : int @@ -150,7 +150,7 @@ def kneighbors_graph( @validate_params( { - "X": ["array-like", RadiusNeighborsMixin], + "X": ["array-like", "sparse matrix", RadiusNeighborsMixin], "radius": [Interval(Real, 0, None, closed="both")], "mode": [StrOptions({"connectivity", "distance"})], "metric": [StrOptions(set(itertools.chain(*VALID_METRICS.values()))), callable], @@ -181,7 +181,7 @@ def radius_neighbors_graph( Parameters ---------- - X : array-like of shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) Sample data. radius : float diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 41494f2649a01..48533c7ec8a00 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -419,7 +419,7 @@ def _more_tags(self): @validate_params( { - "y": ["array-like"], + "y": ["array-like", "sparse matrix"], "classes": ["array-like"], "neg_label": [Interval(Integral, None, None, closed="neither")], "pos_label": [Interval(Integral, None, None, closed="neither")], @@ -440,7 +440,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) Parameters ---------- - y : array-like + y : array-like or sparse matrix Sequence of integer labels or multilabel data to encode. classes : array-like of shape (n_classes,) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index f44c0ca078777..354d8240045ed 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -441,7 +441,7 @@ def _get_column_indices_interchange(X_interchange, key, key_dtype): "replace": ["boolean"], "n_samples": [Interval(numbers.Integral, 1, None, closed="left"), None], "random_state": ["random_state"], - "stratify": ["array-like", None], + "stratify": ["array-like", "sparse matrix", None], }, prefer_skip_nested_validation=True, ) @@ -474,8 +474,8 @@ def resample(*arrays, replace=True, n_samples=None, random_state=None, stratify= Pass an int for reproducible results across multiple function calls. See :term:`Glossary `. - stratify : array-like of shape (n_samples,) or (n_samples, n_outputs), \ - default=None + stratify : {array-like, sparse matrix} of shape (n_samples,) or \ + (n_samples, n_outputs), default=None If not None, data is split in a stratified fashion, using this as the class labels. diff --git a/sklearn/utils/tests/test_param_validation.py b/sklearn/utils/tests/test_param_validation.py index 795fdecfba2e4..dc1176573951f 100644 --- a/sklearn/utils/tests/test_param_validation.py +++ b/sklearn/utils/tests/test_param_validation.py @@ -34,6 +34,7 @@ make_constraint, validate_params, ) +from sklearn.utils.fixes import CSR_CONTAINERS # Some helpers for the tests @@ -405,6 +406,10 @@ def test_generate_valid_param(constraint): ("array-like", [[1, 2], [3, 4]]), ("array-like", np.array([[1, 2], [3, 4]])), ("sparse matrix", csr_matrix([[1, 2], [3, 4]])), + *[ + ("sparse matrix", container([[1, 2], [3, 4]])) + for container in CSR_CONTAINERS + ], ("random_state", 0), ("random_state", np.random.RandomState(0)), ("random_state", None), diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index a5c84ecf6411c..c60b38ca6d721 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -290,6 +290,9 @@ def as_float_array(X, *, copy=True, force_all_finite=True): def _is_arraylike(x): """Returns whether the input is array-like.""" + if sp.issparse(x): + return False + return hasattr(x, "__len__") or hasattr(x, "shape") or hasattr(x, "__array__") @@ -2135,8 +2138,10 @@ def _check_method_params(X, params, indices=None): method_params_validated = {} for param_key, param_value in params.items(): - if not _is_arraylike(param_value) or _num_samples(param_value) != _num_samples( - X + if ( + not _is_arraylike(param_value) + and not sp.issparse(param_value) + or _num_samples(param_value) != _num_samples(X) ): # Non-indexable pass-through (for now for backward-compatibility). # https://github.com/scikit-learn/scikit-learn/issues/15805 From b27894e466e6433ddbd2bca821e263df25acc2c9 Mon Sep 17 00:00:00 2001 From: Xavier Beltran <94066629+xavierbeltran@users.noreply.github.com> Date: Wed, 28 Feb 2024 12:16:42 -0500 Subject: [PATCH 23/23] DOC Added an example for the sklearn.datasets.get_data_home (#28290) Co-authored-by: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> --- sklearn/datasets/_base.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py index f75d9aaf49f1d..962bc55a60925 100644 --- a/sklearn/datasets/_base.py +++ b/sklearn/datasets/_base.py @@ -67,6 +67,14 @@ def get_data_home(data_home=None) -> str: ------- data_home: str The path to scikit-learn data directory. + + Examples + -------- + >>> import os + >>> from sklearn.datasets import get_data_home + >>> data_home_path = get_data_home() + >>> os.path.exists(data_home_path) + True """ if data_home is None: data_home = environ.get("SCIKIT_LEARN_DATA", join("~", "scikit_learn_data"))