From fe69ce4d39dfffe9c3e434be1cdd300ebe059a16 Mon Sep 17 00:00:00 2001 From: Diane Napolitano Date: Wed, 31 Jan 2024 17:13:46 -0500 Subject: [PATCH] Removing the bootstrap matrix solver from this branch in favor of it living in the ELEX-3830 branch --- setup.py | 2 +- src/elexsolver/TransitionMatrixSolver.py | 91 --------- tests/test_transition_matrix_solver.py | 238 +---------------------- 3 files changed, 2 insertions(+), 329 deletions(-) diff --git a/setup.py b/setup.py index 959e7497..9c1fcf59 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup -INSTALL_REQUIRES = ["cvxpy~=1.4", "numpy~=1.26", "pymc~=5.10", "scipy~=1.12", "tqdm~=4.66"] +INSTALL_REQUIRES = ["cvxpy~=1.4", "numpy~=1.26", "pymc~=5.10", "scipy~=1.12"] THIS_FILE_DIR = os.path.dirname(__file__) diff --git a/src/elexsolver/TransitionMatrixSolver.py b/src/elexsolver/TransitionMatrixSolver.py index 38ac22e7..f9e6dd58 100644 --- a/src/elexsolver/TransitionMatrixSolver.py +++ b/src/elexsolver/TransitionMatrixSolver.py @@ -3,7 +3,6 @@ import cvxpy as cp import numpy as np -from tqdm import tqdm from elexsolver.logging import initialize_logging from elexsolver.TransitionSolver import TransitionSolver @@ -107,93 +106,3 @@ def fit_predict(self, X: np.ndarray, Y: np.ndarray, weights: np.ndarray | None = percentages = self.__solve(X, Y, weights) self._transitions = np.diag(X_expected_totals) @ percentages return percentages - - -class BootstrapTransitionMatrixSolver(TransitionSolver): - """ - Bootstrap version of the matrix regression transition solver. - """ - - def __init__(self, B: int = 1000, strict: bool = True, verbose: bool = True, lam: int | None = None): - """ - Parameters - ---------- - `B` : int, default 1000 - Number of bootstrap samples to draw and matrix solver models to fit/predict. - `strict` : bool, default True - If `True`, solution will be constrainted so that all coefficients are >= 0, - <= 1, and the sum of each row equals 1. - `verbose` : bool, default True - If `False`, this will reduce the amount of logging produced for each of the `B` bootstrap samples. - `lam` : float, optional - `lam` != 0 will enable L2 regularization (Ridge). - """ - super().__init__() - self._strict = strict - self._B = B - self._verbose = verbose - self._lambda = lam - - # class members that are instantiated during model-fit - self._predicted_percentages = None - self._X_expected_totals = None - - def fit_predict(self, X: np.ndarray, Y: np.ndarray, weights: np.ndarray | None = None) -> np.ndarray: - self._predicted_percentages = [] - - # assuming pandas.DataFrame - if not isinstance(X, np.ndarray): - X = X.to_numpy() - if not isinstance(Y, np.ndarray): - Y = Y.to_numpy() - - self._X_expected_totals = X.sum(axis=0) / X.sum(axis=0).sum() - - tm = TransitionMatrixSolver(strict=self._strict, lam=self._lambda) - self._predicted_percentages.append(tm.fit_predict(X, Y, weights=weights)) - - for b in tqdm(range(0, self._B - 1), desc="Bootstrapping", disable=not self._verbose): - rng = np.random.default_rng(seed=b) - X_resampled = rng.choice( - X, len(X), replace=True, axis=0, p=(weights / weights.sum() if weights is not None else None) - ) - indices = [np.where((X == x).all(axis=1))[0][0] for x in X_resampled] - Y_resampled = Y[indices] - self._predicted_percentages.append(tm.fit_predict(X_resampled, Y_resampled, weights=None)) - - percentages = np.mean(self._predicted_percentages, axis=0) - self._transitions = np.diag(self._X_expected_totals) @ percentages - return percentages - - def get_confidence_interval(self, alpha: float, transitions: bool = False) -> (np.ndarray, np.ndarray): - """ - Parameters - ---------- - `alpha` : float - Value between [0, 1). If greater than 1, will be divided by 100. - `transitions` : bool, default False - If True, the returned matrices will represent transitions, not percentages. - - Returns - ------- - A tuple of two np.ndarray matrices of float. Element 0 has the lower bound and 1 has the upper bound. - """ - if alpha > 1: - alpha = alpha / 100 - if alpha < 0 or alpha >= 1: - raise ValueError(f"Invalid confidence interval {alpha}.") - - p_lower = ((1.0 - alpha) / 2.0) * 100 - p_upper = ((1.0 + alpha) / 2.0) * 100 - - percentages = ( - np.percentile(self._predicted_percentages, p_lower, axis=0), - np.percentile(self._predicted_percentages, p_upper, axis=0), - ) - - if transitions: - return ( - np.diag(self._X_expected_totals) @ percentages[0], - np.diag(self._X_expected_totals) @ percentages[1], - ) - return percentages diff --git a/tests/test_transition_matrix_solver.py b/tests/test_transition_matrix_solver.py index a11ffdcd..44d4a4bd 100644 --- a/tests/test_transition_matrix_solver.py +++ b/tests/test_transition_matrix_solver.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from elexsolver.TransitionMatrixSolver import BootstrapTransitionMatrixSolver, TransitionMatrixSolver +from elexsolver.TransitionMatrixSolver import TransitionMatrixSolver RTOL = 1e-04 ATOL = 1e-04 @@ -228,239 +228,3 @@ def test_matrix_fit_predict_pandas(): except ImportError: # pass this test through since pandas isn't a requirement for elex-solver assert True - - -def test_bootstrap_fit_predict(): - X = np.array( - [ - [1, 2], - [3, 4], - [5, 6], - [7, 8], - [9, 10], - [11, 12], - ] - ) - - Y = np.array( - [ - [2, 3], - [4, 5], - [6, 7], - [8, 9], - [10, 11], - [12, 13], - ] - ) - - expected = np.array([[0.809393, 0.190607], [0.173843, 0.826157]]) - - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - current = btms.fit_predict(X, Y) - np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL) - - -def test_bootstrap_fit_predict_with_weights(): - X = np.array( - [ - [1, 2], - [3, 4], - [5, 6], - [7, 8], - [9, 10], - [11, 12], - ] - ) - - Y = np.array( - [ - [2, 3], - [4, 5], - [6, 7], - [8, 9], - [10, 11], - [12, 13], - ] - ) - - weights = np.array([500, 250, 125, 62.5, 31.25, 15.625]) - - expected = np.array([[0.739798, 0.260202], [0.229358, 0.770642]]) - - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - current = btms.fit_predict(X, Y, weights=weights) - np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL) - - -def test_bootstrap_confidence_interval_percentages(): - X = np.array( - [ - [1, 2], - [3, 4], - [5, 6], - [7, 8], - [9, 10], - [11, 12], - ] - ) - - Y = np.array( - [ - [2, 3], - [4, 5], - [6, 7], - [8, 9], - [10, 11], - [12, 13], - ] - ) - - expected_lower = np.array([[0.757573, 0.095978], [0.09128, 0.779471]]) - expected_upper = np.array([[0.904022, 0.242427], [0.220529, 0.90872]]) - - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - _ = btms.fit_predict(X, Y) - (current_lower, current_upper) = btms.get_confidence_interval(0.95, transitions=False) - np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL) - np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL) - - -def test_bootstrap_confidence_interval_greater_than_1(): - X = np.array( - [ - [1, 2], - [3, 4], - [5, 6], - [7, 8], - [9, 10], - [11, 12], - ] - ) - - Y = np.array( - [ - [2, 3], - [4, 5], - [6, 7], - [8, 9], - [10, 11], - [12, 13], - ] - ) - - expected_lower = np.array([[0.757573, 0.095978], [0.09128, 0.779471]]) - expected_upper = np.array([[0.904022, 0.242427], [0.220529, 0.90872]]) - - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - _ = btms.fit_predict(X, Y) - (current_lower, current_upper) = btms.get_confidence_interval(95, transitions=False) - np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL) - np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL) - - -def test_bootstrap_confidence_interval_invalid(): - X = np.array( - [ - [1, 2], - [3, 4], - [5, 6], - [7, 8], - [9, 10], - [11, 12], - ] - ) - - Y = np.array( - [ - [2, 3], - [4, 5], - [6, 7], - [8, 9], - [10, 11], - [12, 13], - ] - ) - - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - _ = btms.fit_predict(X, Y) - - with pytest.raises(ValueError): - btms.get_confidence_interval(-34) - - -def test_bootstrap_confidence_interval_transitions(): - X = np.array( - [ - [1, 2], - [3, 4], - [5, 6], - [7, 8], - [9, 10], - [11, 12], - ] - ) - - Y = np.array( - [ - [2, 3], - [4, 5], - [6, 7], - [8, 9], - [10, 11], - [12, 13], - ] - ) - - expected_lower = np.array([[0.349649, 0.044297], [0.049151, 0.419715]]) - expected_upper = np.array([[0.417241, 0.111889], [0.118746, 0.489311]]) - - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - _ = btms.fit_predict(X, Y) - (current_lower, current_upper) = btms.get_confidence_interval(0.95, transitions=True) - np.testing.assert_allclose(expected_lower, current_lower, rtol=RTOL, atol=ATOL) - np.testing.assert_allclose(expected_upper, current_upper, rtol=RTOL, atol=ATOL) - - -def test_bootstrap_get_prediction_interval(): - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - with pytest.raises(NotImplementedError): - btms.get_prediction_interval(0) - - -def test_bootstrap_fit_predict_pandas(): - try: - import pandas # pylint: disable=import-outside-toplevel - - X = pandas.DataFrame( - [ - [1, 2], - [3, 4], - [5, 6], - [7, 8], - [9, 10], - [11, 12], - ], - columns=["x1", "x2"], - ) - - Y = pandas.DataFrame( - [ - [2, 3], - [4, 5], - [6, 7], - [8, 9], - [10, 11], - [12, 13], - ], - columns=["y1", "y2"], - ) - - expected = np.array([[0.809393, 0.190607], [0.173843, 0.826157]]) - - btms = BootstrapTransitionMatrixSolver(B=10, verbose=False) - current = btms.fit_predict(X, Y) - np.testing.assert_allclose(expected, current, rtol=RTOL, atol=ATOL) - - except ImportError: - # pass this test through since pandas isn't a requirement for elex-solver - assert True