diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 613e791b7bb78..38f7a33ab9651 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -255,6 +255,10 @@ Changelog which returns a dense numpy ndarray as before. :pr:`27438` by :user:`Yao Xiao `. +- |API| :func:`datasets.make_sparse_spd_matrix` deprecated the keyword argument ``dim`` + in favor of ``n_dim``. ``dim`` will be removed in version 1.6. + :pr:`27718` by :user:`Adam Li `. + :mod:`sklearn.decomposition` ............................ diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py index 828b44e8c7859..cd0bb4b3dbba8 100644 --- a/sklearn/datasets/_samples_generator.py +++ b/sklearn/datasets/_samples_generator.py @@ -1571,7 +1571,7 @@ def make_spd_matrix(n_dim, *, random_state=None): @validate_params( { - "dim": [Interval(Integral, 1, None, closed="left")], + "n_dim": [Hidden(None), Interval(Integral, 1, None, closed="left")], "alpha": [Interval(Real, 0, 1, closed="both")], "norm_diag": ["boolean"], "smallest_coef": [Interval(Real, 0, 1, closed="both")], @@ -1581,11 +1581,15 @@ def make_spd_matrix(n_dim, *, random_state=None): None, ], "random_state": ["random_state"], + "dim": [ + Interval(Integral, 1, None, closed="left"), + Hidden(StrOptions({"deprecated"})), + ], }, prefer_skip_nested_validation=True, ) def make_sparse_spd_matrix( - dim=1, + n_dim=None, *, alpha=0.95, norm_diag=False, @@ -1593,6 +1597,7 @@ def make_sparse_spd_matrix( largest_coef=0.9, sparse_format=None, random_state=None, + dim="deprecated", ): """Generate a sparse symmetric definite positive matrix. @@ -1600,9 +1605,12 @@ def make_sparse_spd_matrix( Parameters ---------- - dim : int, default=1 + n_dim : int, default=1 The size of the random matrix to generate. + .. versionchanged:: 1.4 + Renamed from ``dim`` to ``n_dim``. + alpha : float, default=0.95 The probability that a coefficient is zero (see notes). Larger values enforce more sparsity. The value should be in the range 0 and 1. @@ -1628,6 +1636,12 @@ def make_sparse_spd_matrix( for reproducible output across multiple function calls. See :term:`Glossary `. + dim : int, default=1 + The size of the random matrix to generate. + + .. deprecated:: 1.4 + `dim` is deprecated and will be removed in 1.6. + Returns ------- prec : ndarray or sparse matrix of shape (dim, dim) @@ -1646,10 +1660,32 @@ def make_sparse_spd_matrix( """ random_state = check_random_state(random_state) - chol = -sp.eye(dim) + # TODO(1.6): remove in 1.6 + # Also make sure to change `n_dim` default back to 1 and deprecate None + if n_dim is not None and dim != "deprecated": + raise ValueError( + "`dim` and `n_dim` cannot be both specified. Please use `n_dim` only " + "as `dim` is deprecated in v1.4 and will be removed in v1.6." + ) + + if dim != "deprecated": + warnings.warn( + ( + "dim was deprecated in version 1.4 and will be removed in 1.6." + "Please use ``n_dim`` instead." + ), + FutureWarning, + ) + _n_dim = dim + elif n_dim is None: + _n_dim = 1 + else: + _n_dim = n_dim + + chol = -sp.eye(_n_dim) aux = sp.random( - m=dim, - n=dim, + m=_n_dim, + n=_n_dim, density=1 - alpha, data_rvs=lambda x: random_state.uniform( low=smallest_coef, high=largest_coef, size=x @@ -1661,7 +1697,7 @@ def make_sparse_spd_matrix( # Permute the lines: we don't want to have asymmetries in the final # SPD matrix - permutation = random_state.permutation(dim) + permutation = random_state.permutation(_n_dim) aux = aux[permutation].T[permutation] chol += aux prec = chol.T @ chol diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 82637cbaa90df..6c5d822163e63 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -559,15 +559,15 @@ def test_make_spd_matrix(): "sparse_format", [None, "bsr", "coo", "csc", "csr", "dia", "dok", "lil"] ) def test_make_sparse_spd_matrix(norm_diag, sparse_format, global_random_seed): - dim = 5 + n_dim = 5 X = make_sparse_spd_matrix( - dim=dim, + n_dim=n_dim, norm_diag=norm_diag, sparse_format=sparse_format, random_state=global_random_seed, ) - assert X.shape == (dim, dim), "X shape mismatch" + assert X.shape == (n_dim, n_dim), "X shape mismatch" if sparse_format is None: assert not sp.issparse(X) assert_allclose(X, X.T) @@ -585,7 +585,27 @@ def test_make_sparse_spd_matrix(norm_diag, sparse_format, global_random_seed): if norm_diag: # Check that leading diagonal elements are 1 - assert_array_almost_equal(Xarr.diagonal(), np.ones(dim)) + assert_array_almost_equal(Xarr.diagonal(), np.ones(n_dim)) + + +# TODO(1.6): remove +def test_make_sparse_spd_matrix_deprecation_warning(): + """Check the message for future deprecation.""" + warn_msg = "dim was deprecated in version 1.4" + with pytest.warns(FutureWarning, match=warn_msg): + make_sparse_spd_matrix( + dim=1, + ) + + error_msg = "`dim` and `n_dim` cannot be both specified" + with pytest.raises(ValueError, match=error_msg): + make_sparse_spd_matrix( + dim=1, + n_dim=1, + ) + + X = make_sparse_spd_matrix() + assert X.shape[1] == 1 @pytest.mark.parametrize("hole", [False, True])