From 042cd02d80a78129957b92ed8070bcd262dae23a Mon Sep 17 00:00:00 2001
From: Sambit Panda <36676569+sampan501@users.noreply.github.com>
Date: Tue, 10 Oct 2023 20:01:10 -0400
Subject: [PATCH] Fix for MIGHT p-value shuffling all columns by default (#140)

* fix for coleman algorithm API

---------

Signed-off-by: Sampan <36676569+sampan501@users.noreply.github.com>
Co-authored-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v0.3.rst              |  2 ++
 sktree/experimental/sdf.py          |  1 -
 sktree/stats/forestht.py            |  6 +++-
 sktree/stats/tests/test_forestht.py | 44 ++++++++++++++++++++++++-----
 4 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/doc/whats_new/v0.3.rst b/doc/whats_new/v0.3.rst
index 238fcb9ca..ebae29995 100644
--- a/doc/whats_new/v0.3.rst
+++ b/doc/whats_new/v0.3.rst
@@ -13,6 +13,7 @@ Version 0.3
 Changelog
 ---------
 - |Fix| Fixes a bug in consistency of train/test samples when ``random_state`` is not set in FeatureImportanceForestClassifier and FeatureImportanceForestRegressor, by `Adam Li`_ (:pr:`135`)
+- |Fix| Fixes a bug where covariate indices were not shuffled by default when running FeatureImportanceForestClassifier and FeatureImportanceForestRegressor test methods, by `Sambit Panda`_ (:pr:`140`)
 
 Code and Documentation Contributors
 -----------------------------------
@@ -21,4 +22,5 @@ Thanks to everyone who has contributed to the maintenance and improvement of
 the project since version inception, including:
 
 * `Adam Li`_
+* `Sambit Panda`_
 
diff --git a/sktree/experimental/sdf.py b/sktree/experimental/sdf.py
index fafa028ae..a34f5c6e5 100644
--- a/sktree/experimental/sdf.py
+++ b/sktree/experimental/sdf.py
@@ -108,7 +108,6 @@ def __init__(
         monotonic_cst=None,
         n_swaps=1,
     ):
-
         super().__init__(
             n_estimators=n_estimators,
             criterion=criterion,
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 4c425b17f..aa9e81e9b 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -429,7 +429,8 @@ def test(
         y : ArrayLike of shape (n_samples, n_outputs)
             The target matrix.
         covariate_index : ArrayLike, optional of shape (n_covariates,)
-            The index array of covariates to shuffle, by default None.
+            The index array of covariates to shuffle, will shuffle all columns by
+            default (corresponding to None).
         metric : str, optional
             The metric to compute, by default "mse".
         n_repeats : int, optional
@@ -465,6 +466,9 @@ def test(
             observe_stat = self.observe_stat_
 
         # next permute the data
+        if covariate_index is None:
+            covariate_index = np.arange(X.shape[1], dtype=int)
+
         permute_stat, permute_posteriors, permute_samples = self.statistic(
             X,
             y,
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index f63894be7..c16eecbe2 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -358,11 +358,11 @@ def test_permute_per_tree_samples_consistency_with_sklearnforest(seed, sample_da
 
     # if the sample_dataset_per_tree, then the indices should be different across all
     if sample_dataset_per_tree:
-        for (indices, other_indices) in combinations(clf.train_test_samples_, 2):
+        for indices, other_indices in combinations(clf.train_test_samples_, 2):
             assert not np.array_equal(indices[0], other_indices[0])
             assert not np.array_equal(indices[1], other_indices[1])
     else:
-        for (indices, other_indices) in combinations(clf.train_test_samples_, 2):
+        for indices, other_indices in combinations(clf.train_test_samples_, 2):
             assert_array_equal(indices[0], other_indices[0])
             assert_array_equal(indices[1], other_indices[1])
 
@@ -387,7 +387,7 @@ def test_permute_per_tree_samples_consistency_with_sklearnforest(seed, sample_da
 
 
 @pytest.mark.parametrize("seed", [None, 0])
-def test_small_dataset(seed):
+def test_small_dataset_independent(seed):
     n_samples = 32
     n_features = 5
 
@@ -408,13 +408,43 @@ def test_small_dataset(seed):
     assert pvalue > 0.05
 
     stat, pvalue = clf.test(X, y, metric="mi")
-    if seed is not None:
-        assert stat == 0.0
-    else:
-        assert_almost_equal(stat, 0.0, decimal=1)
+    assert_almost_equal(stat, 0.0, decimal=1)
     assert pvalue > 0.05
 
 
+@flaky(max_runs=3)
+@pytest.mark.parametrize("seed", [None, 0])
+def test_small_dataset_dependent(seed):
+    n_samples = 100
+    n_features = 5
+    rng = np.random.default_rng(seed)
+
+    X = rng.uniform(size=(n_samples, n_features))
+    X = rng.uniform(size=(n_samples // 2, n_features))
+    X2 = X + 3
+    X = np.vstack([X, X2])
+    y = np.vstack(
+        [np.zeros((n_samples // 2, 1)), np.ones((n_samples // 2, 1))]
+    )  # Binary classification
+    print(X.shape, y.shape)
+
+    clf = FeatureImportanceForestClassifier(
+        estimator=HonestForestClassifier(
+            n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.5
+        ),
+        test_size=0.2,
+        permute_per_tree=False,
+        sample_dataset_per_tree=False,
+    )
+    stat, pvalue = clf.test(X, y, covariate_index=[1, 2], metric="mi")
+    assert ~np.isnan(pvalue)
+    assert ~np.isnan(stat)
+    assert pvalue <= 0.05
+
+    stat, pvalue = clf.test(X, y, metric="mi")
+    assert pvalue <= 0.05
+
+
 # @pytest.mark.monitor_test
 # def test_memory_usage():
 #     n_samples = 1000