diff --git a/benchmarks_nonasv/notebooks/might_example_auc.ipynb b/benchmarks_nonasv/notebooks/might_example_auc.ipynb
deleted file mode 100644
index f844e5185..000000000
--- a/benchmarks_nonasv/notebooks/might_example_auc.ipynb
+++ /dev/null
@@ -1,252 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "711de268",
-   "metadata": {},
-   "source": [
-    "## Mutual Information for Genuine Hypothesis Testing (MIGHT)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f7bc7387",
-   "metadata": {},
-   "source": [
-    "An example using `sktree.stats.FeatureImportanceForestClassifier` for nonparametric multivariate hypothesis test, on simulated datasets. Here, we present a simulation of how MIGHT is used to evaluate how a \"feature set is important for predicting the target\".\n",
-    "\n",
-    "We simulate a dataset with 1000 features, 500 samples, and a binary class target variable. Within each feature set, there is 500 features associated with one feature set, and another 500 features associated with another feature set. One could think of these for example as different datasets collected on the same patient in a biomedical setting.\n",
-    "The first feature set (X) is strongly correlated with the target, and the second feature set (W) is weakly correlated with the target (y).\n",
-    "\n",
-    "We then use MIGHT to calculate the partial AUC of these sets."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "55286132",
-   "metadata": {},
-   "source": [
-    "### Installation (WIP)\n",
-    "\n",
-    "```\n",
-    "cd scikit-tree/\n",
-    "git checkout might\n",
-    "git pull\n",
-    "pip install -e .\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "07959f0d",
-   "metadata": {},
-   "source": [
-    "### Import dependencies & set random seed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "916af34d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "from scipy.special import expit\n",
-    "\n",
-    "from sktree import HonestForestClassifier\n",
-    "from sktree.stats import FeatureImportanceForestClassifier\n",
-    "from sktree.tree import DecisionTreeClassifier\n",
-    "\n",
-    "seed = 12345\n",
-    "rng = np.random.default_rng(seed)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9ccfa9ac",
-   "metadata": {},
-   "source": [
-    "### Simulate data\n",
-    "\n",
-    "We simulate the two feature sets and the target variable. We then combine them into a single dataset to perform hypothesis testing."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "c479d14e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_samples = 1000\n",
-    "n_features_set = 500\n",
-    "mean = 1.0\n",
-    "sigma = 2.0\n",
-    "beta = 5.0\n",
-    "\n",
-    "unimportant_mean = 0.0\n",
-    "unimportant_sigma = 4.5\n",
-    "\n",
-    "# first sample the informative features, and then the uniformative features\n",
-    "X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))\n",
-    "X_important = np.hstack(\n",
-    "    [\n",
-    "        X_important,\n",
-    "        rng.normal(\n",
-    "            loc=unimportant_mean,\n",
-    "            scale=unimportant_sigma,\n",
-    "            size=(n_samples, n_features_set - 10),\n",
-    "        ),\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "X_unimportant = rng.normal(\n",
-    "    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)\n",
-    ")\n",
-    "X = np.hstack([X_important, X_unimportant])\n",
-    "\n",
-    "# simulate the binary target variable\n",
-    "y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7afbe135",
-   "metadata": {},
-   "source": [
-    "### Use partial AUC as test statistic\n",
-    "\n",
-    "You can specify the max FPR in `statistic`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "a3b78a92",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ASH-90 / Partial AUC: 0.6049643724062328\n",
-      "Shape of Observed Samples: (1000,)\n",
-      "Shape of Tree Posteriors for the positive class: (125, 1000, 1)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# parameters that could be changed\n",
-    "n_estimators = 125\n",
-    "max_features = \"sqrt\"\n",
-    "metric = \"auc\"\n",
-    "test_size = 0.2\n",
-    "n_jobs = -1\n",
-    "honest_fraction = 0.7\n",
-    "max_fpr = 0.1\n",
-    "\n",
-    "est = FeatureImportanceForestClassifier(\n",
-    "    estimator=HonestForestClassifier(\n",
-    "        n_estimators=n_estimators,\n",
-    "        max_features=max_features,\n",
-    "        tree_estimator=DecisionTreeClassifier(),\n",
-    "        random_state=seed,\n",
-    "        honest_fraction=honest_fraction,\n",
-    "        n_jobs=n_jobs,\n",
-    "    ),\n",
-    "    random_state=seed,\n",
-    "    test_size=test_size,\n",
-    "    permute_per_tree=True,\n",
-    "    sample_dataset_per_tree=True,\n",
-    ")\n",
-    "\n",
-    "# we test for the first feature set, which is important and thus should return a higher AUC\n",
-    "stat, posterior_arr, samples = est.statistic(\n",
-    "    X_important,\n",
-    "    y,\n",
-    "    metric=metric,\n",
-    "    return_posteriors=True,\n",
-    ")\n",
-    "\n",
-    "print(\"ASH-90 / Partial AUC:\", stat)\n",
-    "print(\"Shape of Observed Samples:\", samples.shape)\n",
-    "print(\"Shape of Tree Posteriors for the positive class:\", posterior_arr.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "b41d2a8e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ASH-90 / Partial AUC: 0.4944796805261922\n",
-      "Shape of Observed Samples: (1000,)\n",
-      "Shape of Tree Posteriors for the positive class: (125, 1000, 1)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Repeat for the second feature set\n",
-    "stat, posterior_arr, samples = est.statistic(\n",
-    "    X_unimportant,\n",
-    "    y,\n",
-    "    metric=metric,\n",
-    "    return_posteriors=True,\n",
-    ")\n",
-    "\n",
-    "print(\"ASH-90 / Partial AUC:\", stat)\n",
-    "print(\"Shape of Observed Samples:\", samples.shape)\n",
-    "print(\"Shape of Tree Posteriors for the positive class:\", posterior_arr.shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "02e69f1c",
-   "metadata": {},
-   "source": [
-    "### All posteriors are saved within the model\n",
-    "\n",
-    "Extract the results from the model variables anytime. You can save the model with `pickle`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4e8d8f2f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"ASH-90 / Partial AUC:\", est.observe_stat_)\n",
-    "print(\"Observed Samples:\", est.observe_samples_)\n",
-    "print(\"Tree Posteriors for the positive class:\", est.observe_posteriors_)  # (n_trees, n_samples_test, 1)\n",
-    "print(\"True Labels:\", est.y_true_final_)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/examples/plot_might_auc.py b/examples/plot_might_auc.py
new file mode 100644
index 000000000..27161fdc1
--- /dev/null
+++ b/examples/plot_might_auc.py
@@ -0,0 +1,127 @@
+"""
+===================================================================================
+Compute partial AUC using Mutual Information for Genuine Hypothesis Testing (MIGHT)
+===================================================================================
+
+An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
+multivariate hypothesis test, on simulated datasets. Here, we present a simulation
+of how MIGHT is used to evaluate how a "feature set is important for predicting the target".
+
+We simulate a dataset with 1000 features, 500 samples, and a binary class target
+variable. Within each feature set, there is 500 features associated with one feature
+set, and another 500 features associated with another feature set. One could think of
+these for example as different datasets collected on the same patient in a biomedical setting.
+The first feature set (X) is strongly correlated with the target, and the second
+feature set (W) is weakly correlated with the target (y).
+
+We then use MIGHT to calculate the partial AUC of these sets.
+"""
+
+import numpy as np
+from scipy.special import expit
+
+from sktree import HonestForestClassifier
+from sktree.stats import FeatureImportanceForestClassifier
+from sktree.tree import DecisionTreeClassifier
+
+seed = 12345
+rng = np.random.default_rng(seed)
+
+# %%
+# Simulate data
+# -------------
+# We simulate the two feature sets, and the target variable. We then combine them
+# into a single dataset to perform hypothesis testing.
+
+n_samples = 1000
+n_features_set = 500
+mean = 1.0
+sigma = 2.0
+beta = 5.0
+
+unimportant_mean = 0.0
+unimportant_sigma = 4.5
+
+# first sample the informative features, and then the uniformative features
+X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
+X_important = np.hstack(
+    [
+        X_important,
+        rng.normal(
+            loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
+        ),
+    ]
+)
+
+X_unimportant = rng.normal(
+    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
+)
+
+# simulate the binary target variable
+y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)
+
+# %%
+# Use partial AUC as test statistic
+# ---------------------------------
+# You can specify the maximum specificity by modifying ``max_fpr`` in ``statistic``.
+
+n_estimators = 125
+max_features = "sqrt"
+metric = "auc"
+test_size = 0.2
+n_jobs = -1
+honest_fraction = 0.7
+max_fpr = 0.1
+
+est = FeatureImportanceForestClassifier(
+    estimator=HonestForestClassifier(
+        n_estimators=n_estimators,
+        max_features=max_features,
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=seed,
+        honest_fraction=honest_fraction,
+        n_jobs=n_jobs,
+    ),
+    random_state=seed,
+    test_size=test_size,
+    permute_per_tree=True,
+    sample_dataset_per_tree=True,
+)
+
+# we test for the first feature set, which is important and thus should return a higher AUC
+stat, posterior_arr, samples = est.statistic(
+    X_important,
+    y,
+    metric=metric,
+    return_posteriors=True,
+)
+
+print(f"ASH-90 / Partial AUC: {stat}")
+print(f"Shape of Observed Samples: {samples.shape}")
+print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")
+
+# %%
+# Repeat for the second feature set
+# ---------------------------------
+# This feature set has a smaller statistic, which is expected due to its weak correlation.
+
+stat, posterior_arr, samples = est.statistic(
+    X_unimportant,
+    y,
+    metric=metric,
+    return_posteriors=True,
+)
+
+print(f"ASH-90 / Partial AUC: {stat}")
+print(f"Shape of Observed Samples: {samples.shape}")
+print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")
+
+# %%
+# All posteriors are saved within the model
+# -----------------------------------------
+# Extract the results from the model variables anytime. You can save the model with ``pickle``.
+#
+# ASH-90 / Partial AUC: ``est.observe_stat_``
+# Observed Samples: ``est.observe_samples_``
+# Tree Posteriors for the positive class: ``est.observe_posteriors_`` (n_trees, n_samples_test, 1)
+# True Labels: ``est.y_true_final_``