From 8f7d52fc013e3f3ec85636284f6876ad0ccfeebe Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Tue, 17 Oct 2023 11:59:39 -0400
Subject: [PATCH 1/4] FIX add max_fpr argument to auc example

---
 examples/hypothesis_testing/plot_might_auc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/hypothesis_testing/plot_might_auc.py b/examples/hypothesis_testing/plot_might_auc.py
index 101ad5b7f..fd032972a 100644
--- a/examples/hypothesis_testing/plot_might_auc.py
+++ b/examples/hypothesis_testing/plot_might_auc.py
@@ -94,6 +94,7 @@
     y,
     metric=metric,
     return_posteriors=True,
+    max_fpr=max_fpr,
 )
 
 print(f"ASH-90 / Partial AUC: {stat}")
@@ -110,6 +111,7 @@
     y,
     metric=metric,
     return_posteriors=True,
+    max_fpr=max_fpr,
 )
 
 print(f"ASH-90 / Partial AUC: {stat}")

From 9ad065efbf41b0acf41a2978d1068f6b532ac64e Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 17 Oct 2023 14:36:46 -0400
Subject: [PATCH 2/4] [DOC] Adding citation file (#144)

* Adding citation file

---------

Signed-off-by: Adam Li <adam2392@gmail.com>
Co-authored-by: Sambit Panda <sampanda501@gmail.com>
Co-authored-by: Haoyin Xu <haoyinxu@gmail.com>
---
 .github/workflows/cffconvert.yml | 22 ++++++++++++++++++++++
 CITATION.cff                     | 31 +++++++++++++++++++++++++++++++
 README.md                        |  1 +
 sktree/_lib/sklearn_fork         |  2 +-
 style_requirements.txt           |  9 +++++++++
 5 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/cffconvert.yml
 create mode 100644 CITATION.cff
 create mode 100644 style_requirements.txt

diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml
new file mode 100644
index 000000000..60fd8f62b
--- /dev/null
+++ b/.github/workflows/cffconvert.yml
@@ -0,0 +1,22 @@
+name: cffconvert
+
+on:
+  push:
+    paths:
+      - CITATION.cff
+  pull_request:
+    paths:
+      - CITATION.cff        
+
+jobs:
+  validate:
+    name: "validate"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out a copy of the repository
+        uses: actions/checkout@v4
+
+      - name: Check whether the citation metadata from CITATION.cff is valid
+        uses: citation-file-format/cffconvert-github-action@2.0.0
+        with:
+          args: "--validate"
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 000000000..e3bf96722
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,31 @@
+# YAML 1.2
+---
+# Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/)
+cff-version: 1.2.0
+title: "Scikit-tree: Modern decision-trees compatible with scikit-learn in Python."
+abstract: "scikit-tree is a scikit-learn compatible API for building state-of-the-art decision trees. These include unsupervised trees, oblique trees, uncertainty trees, quantile trees and causal trees."
+authors:
+  - given-names: Adam
+    family-names: Li
+    affiliation: "Department of Computer Science, Columbia University, New York, NY, USA"
+    orcid: "https://orcid.org/0000-0001-8421-365X"
+  - given-names: Sambit
+    family-names: Panda
+    affiliation: "Department of Biomedical Engineering, Johns Hopkins University, Baltimore, MD, USA"
+    orcid: "https://orcid.org/0000-0001-8455-4243"
+  - given-names: Haoyin
+    family-names: Xu
+    affiliation: "Department of Biomedical Engineering, Johns Hopkins University, Baltimore, MD, USA"
+    orcid: "https://orcid.org/0000-0001-8235-4950"
+type: software
+repository-code: "https://github.com/neurodata/scikit-tree"
+license: 'BSD-3-Clause'
+keywords:
+  - random forest
+  - oblique trees
+  - honest forests
+  - statisical learning
+  - machine learning
+message: >-
+  Please cite this software using the metadata from
+  'preferred-citation' in the CITATION.cff file.
diff --git a/README.md b/README.md
index 12076150c..ac815d9ec 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,7 @@
 [![codecov](https://codecov.io/gh/neurodata/scikit-tree/branch/main/graph/badge.svg?token=H1reh7Qwf4)](https://codecov.io/gh/neurodata/scikit-tree)
 [![PyPI Download count](https://img.shields.io/pypi/dm/scikit-tree.svg)](https://pypistats.org/packages/scikit-tree)
 [![Latest PyPI release](https://img.shields.io/pypi/v/scikit-tree.svg)](https://pypi.org/project/scikit-tree/)
+[![DOI](https://zenodo.org/badge/491260497.svg)](https://zenodo.org/doi/10.5281/zenodo.8412279)
 
 scikit-tree
 ===========
diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index 6c7a5f44e..1adb20907 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit 6c7a5f44eb4ec3bea5dd6a9e4d5db748d12b209e
+Subproject commit 1adb209077f12adac8f760196ae5260abab0cbdd
diff --git a/style_requirements.txt b/style_requirements.txt
new file mode 100644
index 000000000..c3fb899a6
--- /dev/null
+++ b/style_requirements.txt
@@ -0,0 +1,9 @@
+mypy
+black
+isort
+flake8
+bandit
+pydocstyle
+codespell
+toml
+cython-lint
\ No newline at end of file

From 860d1979503de104b5f993c17164dc36353a82f9 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 19 Oct 2023 10:47:55 -0400
Subject: [PATCH 3/4] [DOC] Adding development docs to make sure it is clear
 how to do dev (#150)

* Adding development docs to make sure it is clear how to do dev

Signed-off-by: Adam Li <adam2392@gmail.com>

* Update reqs

Signed-off-by: Adam Li <adam2392@gmail.com>

* Fix rendering of cli commands

Signed-off-by: Adam Li <adam2392@gmail.com>

---------

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 DEVELOPING.md    | 122 ++++++++++++++++++++++++++++++++++++++++++++---
 requirements.txt |   2 +-
 2 files changed, 117 insertions(+), 7 deletions(-)

diff --git a/DEVELOPING.md b/DEVELOPING.md
index 200789d7c..9fdc18090 100644
--- a/DEVELOPING.md
+++ b/DEVELOPING.md
@@ -1,14 +1,103 @@
+<!-- TOC -->
+
+- [Requirements](#requirements)
+- [Setting up your development environment](#setting-up-your-development-environment)
+- [Building the project from source](#building-the-project-from-source)
+- [Development Tasks](#development-tasks)
+        - [Basic Verification](#basic-verification)
+        - [Docsite](#docsite)
+    - [Details](#details)
+        - [Coding Style](#coding-style)
+        - [Lint](#lint)
+        - [Type checking](#type-checking)
+        - [Unit tests](#unit-tests)
+- [Advanced Updating submodules](#advanced-updating-submodules)
+- [Cython and C++](#cython-and-c)
+- [Making a Release](#making-a-release)
+
+<!-- /TOC -->
+
 # Requirements
-* Python 3.8+
-* Poetry (`curl -sSL https://install.python-poetry.org | python - --version=1.2.2`)
 
-For the other requirements, inspect the ``pyproject.toml`` file. If you are updated the dependencies, please run `poetry update` to update the
+* Python 3.9+
+* numpy>=1.25
+* scipy>=1.11
+* scikit-learn>=1.3.1
+
+For the other requirements, inspect the ``pyproject.toml`` file.
+
+# Setting up your development environment
+
+We recommend using miniconda, as python virtual environments may not setup properly compilers necessary for our compiled code. For detailed information on setting up and managing conda environments, see https://conda.io/docs/test-drive.html.
+
+<!-- Setup a conda env -->
+
+    conda create -n sktree
+    conda activate sktree
+
+**Make sure you specify a Python version if your system defaults to anything less than Python 3.9.**
+
+**Any commands should ALWAYS be after you have activated your conda environment.**
+Next, install necessary build dependencies. For more information, see https://scikit-learn.org/stable/developers/advanced_installation.html.
+
+    conda install -c conda-forge joblib threadpoolctl pytest compilers llvm-openmp
+
+Assuming these steps have worked properly and you have read and followed any necessary scikit-learn advanced installation instructions, you can then install dependencies for scikit-tree.
+
+If you are developing locally, you will need the build dependencies to compile the Cython / C++ code:
+
+    pip install -r build_requirements.txt
+
+Other requirements can be installed as such:
+
+    pip install -r requirements.txt
+    pip install -r style_requirements.txt
+    pip install -r test_requirements.txt
+    pip install -r doc_requirements.txt
+
+# Building the project from source
+
+We leverage meson to build scikit-tree from source. We utilize a CLI tool, called [spin](https://github.com/scientific-python/spin), which wraps certain meson commands to make building easier.
+
+For example, the following command will build the project completely from scratch
+
+    spin build --clean
+
+If you have part of the build already done, you can run:
+
+    spin build
+
+The following command will test the project
+
+    spin test
+
+For other commands, see
+
+    spin --help
+
+Note at this stage, you will be unable to run Python commands directly. For example, ``pytest ./sktree`` will not work.
+
+However, after installing and building the project from source using meson, you can leverage editable installs to make testing code changes much faster. For more information on meson-python's progress supporting editable installs in a better fashion, see https://meson-python.readthedocs.io/en/latest/how-to-guides/editable-installs.html.
+
+    pip install --no-build-isolation --editable .
+
+**Note: editable installs for scikit-tree REQUIRE you to have built the project using meson already.** This will now link the meson build to your Python runtime. Now if you run
+
+    pytest ./sktree
+
+the unit-tests should run.
 
 # Development Tasks
-There are a series of top-level tasks available through Poetry. These can each be run via
+There are a series of top-level tasks available through Poetry. If you are updated the dependencies, please run `poetry update` to update the lock file. These can each be run via
 
  `poetry run poe <taskname>`
 
+To do so, first install poetry and poethepoet.
+
+    pip install poetry poethepoet
+
+Now, you are ready to run quick commands to format the codebase, lint the codebase and type-check the codebase.
+
 ### Basic Verification
 * **format** - runs the suite of formatting tools applying tools to make code compliant
 * **format_check** - runs the suite of formatting tools checking for compliance
@@ -53,6 +142,23 @@ In order for any code to be added to the repository, we require unit tests to pa
 
     poetry run poe unit_test
 
+# (Advanced) Updating submodules
+
+Scikit-tree relies on a submodule of a forked-version of scikit-learn for certain Python and Cython code that extends the ``DecisionTree*`` models. Usually, if a developer is making changes, they should go over to the ``submodulev3`` branch on ``https://github.com/neurodata/scikit-learn`` and 
+submit a PR to make changes to the submodule. 
+
+This should **ALWAYS** be supported by some use-case in scikit-tree. We want the minimal amount of code-change in our forked version of scikit-learn to make it very easy to merge in upstream changes, bug fixes and features for tree-based code.
+
+Once a PR is submitted and merged, the developer can update the submodule here in scikit-tree, so that we leverage the new commit. You **must** update the submodule commit ID and also commit this change, so that way the build leverages the new submodule commit ID.
+
+    git submodule update --init --recursive --remote
+    git add -A
+    git commit -m "Update submodule" -s
+
+Now, you can re-build the project using the latest submodule changes.
+
+    spin build --clean
+
 # Cython and C++
 The general design of scikit-tree follows that of the tree-models inside scikit-learn, where tree-based models are inherently Cythonized, or written with C++. Then the actual forest (e.g. RandomForest, or ExtraForest) is just a Python API wrapper that creates an ensemble of the trees.
 
@@ -68,13 +174,17 @@ https://github.com/neurodata/scikit-tree/actions/workflows/build_wheels.yml will
 
 2. Upload wheels to test PyPi
 
-    twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+```
+twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+```
 
 Verify that installations work as expected on your machine.
 
 3. Upload wheels
 
-    twine upload dist/*
+```
+twine upload dist/*
+```
 
 or if you have two-factor authentication enabled: https://pypi.org/help/#apitoken
 
diff --git a/requirements.txt b/requirements.txt
index dc42ea7e7..99963814e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 numpy>=1.25
-scipy
+scipy>=1.11
 scikit-learn>=1.3.1

From 359ea75740910513e582f5fc6b5a74d7e6da03d9 Mon Sep 17 00:00:00 2001
From: Yuxin <99897042+YuxinB@users.noreply.github.com>
Date: Thu, 19 Oct 2023 14:05:34 -0400
Subject: [PATCH 4/4] Stratify sampling when split train/test data (#143)

* Stratify sampling when split train/test data

---------

Co-authored-by: Haoyin Xu <haoyinxu@gmail.com>
Co-authored-by: Adam Li <adam2392@gmail.com>
Co-authored-by: Sambit Panda <36676569+sampan501@users.noreply.github.com>
---
 doc/whats_new/_contributors.rst               |  1 +
 doc/whats_new/v0.3.rst                        |  3 +-
 ...t_MI_genuine_hypothesis_testing_forest.py} | 16 +++----
 .../plot_MI_imbalanced_hyppo_testing.py       |  8 ++--
 requirements.txt                              |  1 +
 sktree/stats/forestht.py                      | 42 ++++++++++++-------
 sktree/stats/tests/test_forestht.py           | 32 ++++++++++++++
 7 files changed, 76 insertions(+), 27 deletions(-)
 rename examples/hypothesis_testing/{plot_MI_gigantic_hypothesis_testing_forest.py => plot_MI_genuine_hypothesis_testing_forest.py} (94%)

diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
index 3e5ca2110..eb441d66d 100644
--- a/doc/whats_new/_contributors.rst
+++ b/doc/whats_new/_contributors.rst
@@ -26,3 +26,4 @@
 .. _SUKI-O : https://github.com/SUKI-O
 .. _Ronan Perry : https://rflperry.github.io/
 .. _Haoyin Xu : https://github.com/PSSF23
+.. _Yuxin Bai : https://github.com/YuxinB
diff --git a/doc/whats_new/v0.3.rst b/doc/whats_new/v0.3.rst
index fec97bb01..7b163ef19 100644
--- a/doc/whats_new/v0.3.rst
+++ b/doc/whats_new/v0.3.rst
@@ -15,6 +15,7 @@ Changelog
 - |Fix| Fixes a bug in consistency of train/test samples when ``random_state`` is not set in FeatureImportanceForestClassifier and FeatureImportanceForestRegressor, by `Adam Li`_ (:pr:`135`)
 - |Fix| Fixes a bug where covariate indices were not shuffled by default when running FeatureImportanceForestClassifier and FeatureImportanceForestRegressor test methods, by `Sambit Panda`_ (:pr:`140`)
 - |Enhancement| Add multi-view splitter for axis-aligned decision trees, by `Adam Li`_ (:pr:`129`)
+- |Enhancement| Add stratified sampling option to ``FeatureImportance*`` via the ``stratify`` keyword argument, by `Yuxin Bai`_ (:pr:`143`)
 
 Code and Documentation Contributors
 -----------------------------------
@@ -24,4 +25,4 @@ the project since version inception, including:
 
 * `Adam Li`_
 * `Sambit Panda`_
-
+* `Yuxin Bai`_
diff --git a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py
similarity index 94%
rename from examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
rename to examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py
index 423bc63dc..e6831a9e7 100644
--- a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
+++ b/examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py
@@ -1,7 +1,7 @@
 """
-===========================================================
-Mutual Information for Gigantic Hypothesis Testing (MIGHT)
-===========================================================
+=========================================================
+Mutual Information for Genuine Hypothesis Testing (MIGHT)
+=========================================================
 
 An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
 multivariate hypothesis test, on simulated datasets. Here, we present a simulation
@@ -49,8 +49,8 @@
 # We simulate the two feature sets, and the target variable. We then combine them
 # into a single dataset to perform hypothesis testing.
 
-n_samples = 1000
-n_features_set = 500
+n_samples = 2000
+n_features_set = 20
 mean = 1.0
 sigma = 2.0
 beta = 5.0
@@ -91,7 +91,7 @@
 # computed as the proportion of samples in the null distribution that are less than the
 # observed test statistic.
 
-n_estimators = 200
+n_estimators = 100
 max_features = "sqrt"
 test_size = 0.2
 n_repeats = 1000
@@ -103,12 +103,12 @@
         max_features=max_features,
         tree_estimator=DecisionTreeClassifier(),
         random_state=seed,
-        honest_fraction=0.7,
+        honest_fraction=0.25,
         n_jobs=n_jobs,
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=True,
+    permute_per_tree=False,
     sample_dataset_per_tree=False,
 )
 
diff --git a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
index 882f80c3d..c8a5478a4 100644
--- a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
+++ b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
@@ -1,7 +1,7 @@
 """
-===============================================================================
-Mutual Information for Gigantic Hypothesis Testing (MIGHT) with Imbalanced Data
-===============================================================================
+==============================================================================
+Mutual Information for Genuine Hypothesis Testing (MIGHT) with Imbalanced Data
+==============================================================================
 
 Here, we demonstrate how to do hypothesis testing on highly imbalanced data
 in terms of their feature-set dimensionalities.
@@ -17,7 +17,7 @@
 
 For other examples of hypothesis testing, see the following:
 
-- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_MI_gigantic_hypothesis_testing_forest.py`
+- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_MI_genuine_hypothesis_testing_forest.py`
 - :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_might_auc.py`
 
 For more information on the multi-view decision-tree, see
diff --git a/requirements.txt b/requirements.txt
index 99963814e..978f90fce 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy>=1.25
 scipy>=1.11
 scikit-learn>=1.3.1
+
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 4d6dc7b77..56a044c5c 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -122,6 +122,7 @@ def __init__(
         test_size=0.2,
         permute_per_tree=True,
         sample_dataset_per_tree=True,
+        stratify=False,
     ):
         self.estimator = estimator
         self.random_state = random_state
@@ -129,6 +130,7 @@ def __init__(
         self.test_size = test_size
         self.permute_per_tree = permute_per_tree
         self.sample_dataset_per_tree = sample_dataset_per_tree
+        self.stratify = stratify
 
         self.n_samples_test_ = None
         self._n_samples_ = None
@@ -160,8 +162,9 @@ def reset(self):
         self.n_features_in_ = None
         self._is_fitted = False
         self._seeds = None
+        self._y = None
 
-    def _get_estimators_indices(self, sample_separate=False):
+    def _get_estimators_indices(self, stratifier=None, sample_separate=False):
         indices = np.arange(self._n_samples_, dtype=int)
 
         # Get drawn indices along both sample and feature axes
@@ -191,7 +194,11 @@ def _get_estimators_indices(self, sample_separate=False):
                 # Operations accessing random_state must be performed identically
                 # to those in `_parallel_build_trees()`
                 indices_train, indices_test = train_test_split(
-                    indices, test_size=self.test_size, shuffle=True, random_state=seed
+                    indices,
+                    test_size=self.test_size,
+                    shuffle=True,
+                    stratify=stratifier,
+                    random_state=seed,
                 )
 
                 yield indices_train, indices_test
@@ -202,12 +209,13 @@ def _get_estimators_indices(self, sample_separate=False):
                 else:
                     self._seeds = self.estimator_.random_state
 
-            # TODO: make random_state consistent
             indices_train, indices_test = train_test_split(
                 indices,
                 test_size=self.test_size,
+                stratify=stratifier,
                 random_state=self._seeds,
             )
+
             for _ in self.estimator_.estimators_:
                 yield indices_train, indices_test
 
@@ -227,9 +235,12 @@ def train_test_samples_(self):
         if self._n_samples_ is None:
             raise RuntimeError("The estimator must be fitted before accessing this attribute.")
 
+        # Stratifier uses a cached _y attribute if available
+        stratifier = self._y if is_classifier(self.estimator_) and self.stratify else None
+
         return [
             (indices_train, indices_test)
-            for indices_train, indices_test in self._get_estimators_indices()
+            for indices_train, indices_test in self._get_estimators_indices(stratifier=stratifier)
         ]
 
     def _statistic(
@@ -329,6 +340,8 @@ def statistic(
 
         if self._n_samples_ is None:
             self._n_samples_, self.n_features_in_ = X.shape
+
+        # Infer type of target y
         if self._type_of_target_ is None:
             self._type_of_target_ = type_of_target(y)
 
@@ -339,9 +352,9 @@ def statistic(
             self.permuted_estimator_ = self._get_estimator()
             estimator = self.permuted_estimator_
 
-        # Infer type of target y
-        if not hasattr(self, "_type_of_target"):
-            self._type_of_target_ = type_of_target(y)
+        # Store a cache of the y variable
+        if is_classifier(self._get_estimator()):
+            self._y = y.copy()
 
         # XXX: this can be improved as an extra fit can be avoided, by just doing error-checking
         # and then setting the internal meta data structures
@@ -462,10 +475,10 @@ def test(
             observe_posteriors = self.observe_posteriors_
             observe_stat = self.observe_stat_
 
-        # next permute the data
         if covariate_index is None:
             covariate_index = np.arange(X.shape[1], dtype=int)
 
+        # next permute the data
         permute_stat, permute_posteriors, permute_samples = self.statistic(
             X,
             y,
@@ -724,9 +737,7 @@ def _statistic(
                     self.permute_per_tree,
                     self._type_of_target_,
                 )
-                for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(sample_separate=True)
-                )
+                for idx, (indices_train, indices_test) in enumerate(self.train_test_samples_)
             )
         else:
             # fitting a forest will only get one unique train/test split
@@ -825,6 +836,9 @@ class FeatureImportanceForestClassifier(BaseForestHT):
     sample_dataset_per_tree : bool, default=False
         Whether to sample the dataset per tree or per forest.
 
+    stratify : bool, default=True
+        Whether to stratify the samples by class labels.
+
     Attributes
     ----------
     estimator_ : BaseForest
@@ -877,6 +891,7 @@ def __init__(
         test_size=0.2,
         permute_per_tree=True,
         sample_dataset_per_tree=True,
+        stratify=True,
     ):
         super().__init__(
             estimator=estimator,
@@ -885,6 +900,7 @@ def __init__(
             test_size=test_size,
             permute_per_tree=permute_per_tree,
             sample_dataset_per_tree=sample_dataset_per_tree,
+            stratify=stratify,
         )
 
     def _get_estimator(self):
@@ -945,9 +961,7 @@ def _statistic(
                     self.permute_per_tree,
                     self._type_of_target_,
                 )
-                for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(sample_separate=True)
-                )
+                for idx, (indices_train, indices_test) in enumerate(self.train_test_samples_)
             )
         else:
             # fitting a forest will only get one unique train/test split
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index cecf34b8c..e71e5e09b 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -69,6 +69,38 @@ def test_featureimportance_forest_permute_pertree(sample_dataset_per_tree):
         est.statistic(iris_X[:n_samples], iris_y[:n_samples], [0, 1.0], metric="mi")
 
 
+@pytest.mark.parametrize("sample_dataset_per_tree", [True, False])
+def test_featureimportance_forest_stratified(sample_dataset_per_tree):
+    est = FeatureImportanceForestClassifier(
+        estimator=RandomForestClassifier(
+            n_estimators=10,
+            random_state=seed,
+        ),
+        permute_per_tree=True,
+        test_size=0.7,
+        random_state=seed,
+        sample_dataset_per_tree=sample_dataset_per_tree,
+    )
+    n_samples = 100
+    est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
+
+    _, indices_test = est.train_test_samples_[0]
+    y_test = iris_y[indices_test]
+
+    assert len(y_test[y_test == 0]) == len(y_test[y_test == 1]), (
+        f"{len(y_test[y_test==0])} " f"{len(y_test[y_test==1])}"
+    )
+
+    est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mi")
+
+    _, indices_test = est.train_test_samples_[0]
+    y_test = iris_y[indices_test]
+
+    assert len(y_test[y_test == 0]) == len(y_test[y_test == 1]), (
+        f"{len(y_test[y_test==0])} " f"{len(y_test[y_test==1])}"
+    )
+
+
 def test_featureimportance_forest_errors():
     permute_per_tree = False
     sample_dataset_per_tree = True