From ea8ce18f7f27600d9428fc08a7c0cedc3c4b3c74 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 11 Oct 2023 10:44:04 -0400
Subject: [PATCH 1/9] Adding multiview example

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 examples/plot_multiview_dtc.py | 127 +++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 examples/plot_multiview_dtc.py

diff --git a/examples/plot_multiview_dtc.py b/examples/plot_multiview_dtc.py
new file mode 100644
index 000000000..bd67940db
--- /dev/null
+++ b/examples/plot_multiview_dtc.py
@@ -0,0 +1,127 @@
+"""
+============================================================
+Analyze a multi-view dataset with a multi-view random forest
+============================================================
+
+An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
+multivariate hypothesis test, on simulated datasets. Here, we present a simulation
+of how MIGHT is used to evaluate how a "feature set is important for predicting the target".
+
+We simulate a dataset with 1000 features, 500 samples, and a binary class target
+variable. Within each feature set, there is 500 features associated with one feature
+set, and another 500 features associated with another feature set. One could think of
+these for example as different datasets collected on the same patient in a biomedical setting.
+The first feature set (X) is strongly correlated with the target, and the second
+feature set (W) is weakly correlated with the target (y).
+
+We then use MIGHT to calculate the partial AUC of these sets.
+"""
+
+import numpy as np
+from scipy.special import expit
+
+from sktree import HonestForestClassifier
+from sktree.stats import FeatureImportanceForestClassifier
+from sktree.tree import DecisionTreeClassifier
+
+seed = 12345
+rng = np.random.default_rng(seed)
+
+# %%
+# Simulate data
+# -------------
+# We simulate the two feature sets, and the target variable. We then combine them
+# into a single dataset to perform hypothesis testing.
+
+n_samples = 1000
+n_features_set = 500
+mean = 1.0
+sigma = 2.0
+beta = 5.0
+
+unimportant_mean = 0.0
+unimportant_sigma = 4.5
+
+# first sample the informative features, and then the uniformative features
+X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
+X_important = np.hstack(
+    [
+        X_important,
+        rng.normal(
+            loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
+        ),
+    ]
+)
+
+X_unimportant = rng.normal(
+    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
+)
+
+# simulate the binary target variable
+y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)
+
+# %%
+# Use partial AUC as test statistic
+# ---------------------------------
+# You can specify the maximum specificity by modifying ``max_fpr`` in ``statistic``.
+
+n_estimators = 125
+max_features = "sqrt"
+metric = "auc"
+test_size = 0.2
+n_jobs = -1
+honest_fraction = 0.7
+max_fpr = 0.1
+
+est = FeatureImportanceForestClassifier(
+    estimator=HonestForestClassifier(
+        n_estimators=n_estimators,
+        max_features=max_features,
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=seed,
+        honest_fraction=honest_fraction,
+        n_jobs=n_jobs,
+    ),
+    random_state=seed,
+    test_size=test_size,
+    permute_per_tree=True,
+    sample_dataset_per_tree=True,
+)
+
+# we test for the first feature set, which is important and thus should return a higher AUC
+stat, posterior_arr, samples = est.statistic(
+    X_important,
+    y,
+    metric=metric,
+    return_posteriors=True,
+)
+
+print(f"ASH-90 / Partial AUC: {stat}")
+print(f"Shape of Observed Samples: {samples.shape}")
+print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")
+
+# %%
+# Repeat for the second feature set
+# ---------------------------------
+# This feature set has a smaller statistic, which is expected due to its weak correlation.
+
+stat, posterior_arr, samples = est.statistic(
+    X_unimportant,
+    y,
+    metric=metric,
+    return_posteriors=True,
+)
+
+print(f"ASH-90 / Partial AUC: {stat}")
+print(f"Shape of Observed Samples: {samples.shape}")
+print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")
+
+# %%
+# All posteriors are saved within the model
+# -----------------------------------------
+# Extract the results from the model variables anytime. You can save the model with ``pickle``.
+#
+# ASH-90 / Partial AUC: ``est.observe_stat_``
+# Observed Samples: ``est.observe_samples_``
+# Tree Posteriors for the positive class: ``est.observe_posteriors_`` (n_trees, n_samples_test, 1)
+# True Labels: ``est.y_true_final_``

From 3ac096e89f0956e5eb646d93f972da21acddec6f Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 11 Oct 2023 12:18:40 -0400
Subject: [PATCH 2/9] Merge main

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .../plot_multiview_axis_aligned_splitter.py   | 127 ++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 examples/splitters/plot_multiview_axis_aligned_splitter.py

diff --git a/examples/splitters/plot_multiview_axis_aligned_splitter.py b/examples/splitters/plot_multiview_axis_aligned_splitter.py
new file mode 100644
index 000000000..6663f6fd6
--- /dev/null
+++ b/examples/splitters/plot_multiview_axis_aligned_splitter.py
@@ -0,0 +1,127 @@
+"""
+=================================================================================
+Demonstrate and visualize a multi-view projection matrix for an axis-aligned tree
+=================================================================================
+
+This example shows how multi-view projection matrices are generated for an oblique tree,
+specifically the :class:`sktree.tree.ObliqueDecisionTreeClassifier`.
+
+Multi-view projection matrices operate under the assumption that the input ``X`` array
+consists of multiple feature-sets that are groups of features important for predicting
+``y``.
+
+For details on how to use the hyperparameters related to the multi-view, see
+:class:`sktree.tree.ObliqueDecisionTreeClassifier`.
+"""
+
+# import modules
+# .. note:: We use a private Cython module here to demonstrate what the patches
+#           look like. This is not part of the public API. The Cython module used
+#           is just a Python wrapper for the underlying Cython code and is not the
+#           same as the Cython splitter used in the actual implementation.
+#           To use the actual splitter, one should use the public API for the
+#           relevant tree/forests class.
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.cm import ScalarMappable
+from matplotlib.colors import ListedColormap
+
+from sktree._lib.sklearn.tree._criterion import Gini
+from sktree.tree._oblique_splitter import MultiViewSplitterTester
+
+criterion = Gini(1, np.array((0, 1)))
+max_features = 5
+min_samples_leaf = 1
+min_weight_leaf = 0.0
+random_state = np.random.RandomState(10)
+
+# we "simulate" three feature sets, with 3, 2 and 4 features respectively
+feature_set_ends = np.array([3, 5, 9], dtype=np.intp)
+n_feature_sets = len(feature_set_ends)
+
+feature_combinations = 1
+monotonic_cst = None
+missing_value_feature_mask = None
+
+# initialize some dummy data
+X = np.repeat(np.arange(feature_set_ends[-1]).astype(np.float32), 5).reshape(5, -1)
+y = np.array([0, 0, 0, 1, 1]).reshape(-1, 1).astype(np.float64)
+sample_weight = np.ones(5)
+
+print("The shape of our dataset is: ", X.shape, y.shape, sample_weight.shape)
+
+# %%
+# Initialize the multi-view splitter
+# ----------------------------------
+# The multi-view splitter is a Cython class that is initialized internally
+# in scikit-tree. However, we expose a Python tester object to demonstrate
+# how the splitter works in practice.
+#
+# .. warning:: Do not use this interface directly in practice.
+
+splitter = MultiViewSplitterTester(
+    criterion,
+    max_features,
+    min_samples_leaf,
+    min_weight_leaf,
+    random_state,
+    monotonic_cst,
+    feature_combinations,
+    feature_set_ends,
+    n_feature_sets,
+)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
+
+# %%
+# Sample the projection matrix
+# ----------------------------
+# The projection matrix is sampled by the splitter. The projection
+# matrix is a (max_features, n_features) matrix that selects which features of ``X``
+# to define candidate split dimensions. The multi-view
+# splitter's projection matrix though samples from multiple feature sets,
+# which are aligned contiguously over the columns of ``X``.
+
+projection_matrix = splitter.sample_projection_matrix_py()
+print(projection_matrix)
+
+cmap = ListedColormap(["white", "green"][:n_feature_sets])
+
+# Create a heatmap to visualize the indices
+fig, ax = plt.subplots(figsize=(6, 6))
+
+ax.imshow(
+    projection_matrix, cmap=cmap, aspect=feature_set_ends[-1] / max_features, interpolation="none"
+)
+ax.axvline(feature_set_ends[0] - 0.5, color="black", linewidth=1, label="Feature Sets")
+for iend in feature_set_ends[1:]:
+    ax.axvline(iend - 0.5, color="black", linewidth=1)
+
+ax.set(title="Sampled Projection Matrix", xlabel="Feature Index", ylabel="Projection Vector Index")
+ax.set_xticks(np.arange(feature_set_ends[-1]))
+ax.set_yticks(np.arange(max_features))
+ax.set_yticklabels(np.arange(max_features, dtype=int) + 1)
+ax.set_xticklabels(np.arange(feature_set_ends[-1], dtype=int) + 1)
+ax.legend()
+
+# Create a mappable object
+sm = ScalarMappable(cmap=cmap)
+sm.set_array([])  # You can set an empty array or values here
+
+# Create a color bar with labels for each feature set
+colorbar = fig.colorbar(sm, ax=ax, ticks=[0.25, 0.75], format="%d")
+colorbar.set_label("Projection Weight (I.e. Sampled Feature From a Feature Set)")
+colorbar.ax.set_yticklabels(["0", "1"])
+
+plt.show()
+
+# %%
+# Discussion
+# ----------
+# As we can see, the multi-view splitter samples split candidates uniformly across the feature sets.
+# In contrast, the normal splitter in :class:`sklearn.tree.DecisionTreeClassifier` samples
+# randomly across all ``n_features`` features because it is not aware of the multi-view structure.
+# This is the key difference between the two splitters.
+#
+# For an example of using the multi-view splitter in practice on simulated data, see
+# :ref:`sphx_glr_auto_examples_plot_multiview_dtc.py`.

From 139219ec119cbc15f7b304b79be59b66adfb3620 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 12 Oct 2023 11:35:12 -0400
Subject: [PATCH 3/9] Temp wip

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .spin/cmds.py                                 |   2 +-
 ...gigantic_hypothesis_testing_forest copy.py | 138 ++++++++++++++++++
 .../plot_MI_imbalanced_hyppo_testing.py       | 138 ++++++++++++++++++
 examples/multiview/README.txt                 |   6 +
 examples/multiview/plot_multiview_dtc.py      | 124 ++++++++++++++++
 5 files changed, 407 insertions(+), 1 deletion(-)
 create mode 100644 examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py
 create mode 100644 examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
 create mode 100644 examples/multiview/README.txt
 create mode 100644 examples/multiview/plot_multiview_dtc.py

diff --git a/.spin/cmds.py b/.spin/cmds.py
index 5071dc0a4..a71f863be 100644
--- a/.spin/cmds.py
+++ b/.spin/cmds.py
@@ -54,7 +54,7 @@ def setup_submodule(forcesubmodule=False):
     This will update the submodule, which then must be commited so that
     git knows the submodule needs to be at a certain commit hash.
     """
-    commit_fpath = "./sktree/_lib/sklearn_fork/commit.txt"
+    commit_fpath = "./sktree/_lib/commit.txt"
     submodule = "./sktree/_lib/sklearn_fork"
     commit = ""
     current_hash = ""
diff --git a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py b/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py
new file mode 100644
index 000000000..423bc63dc
--- /dev/null
+++ b/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py	
@@ -0,0 +1,138 @@
+"""
+===========================================================
+Mutual Information for Gigantic Hypothesis Testing (MIGHT)
+===========================================================
+
+An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
+multivariate hypothesis test, on simulated datasets. Here, we present a simulation
+of how MIGHT is used to test the hypothesis that a "feature set is important for
+predicting the target". This is a generalization of the framework presented in
+:footcite:`coleman2022scalable`.
+
+We simulate a dataset with 1000 features, 500 samples, and a binary class target
+variable. Within each feature set, there is 500 features associated with one feature
+set, and another 500 features associated with another feature set. One could think of
+these for example as different datasets collected on the same patient in a biomedical setting.
+The first feature set (X) is strongly correlated with the target, and the second
+feature set (W) is weakly correlated with the target (y). Here, we are testing the
+null hypothesis:
+
+- ``H0: I(X; y) - I(X, W; y) = 0``
+- ``HA: I(X; y) - I(X, W; y) < 0`` indicating that there is more mutual information with
+    respect to ``y``
+
+where ``I`` is mutual information. For example, this could be true in the following settings,
+where X is our informative feature set and W is our uninformative feature set.
+
+- ``W    X -> y``: here ``W`` is completely disconnected from X and y.
+- ``W -> X -> y``: here ``W`` is d-separated from y given X.
+- ``W <- X -> y``: here ``W`` is d-separated from y given X.
+
+We then use MIGHT to test the hypothesis that the first feature set is important for
+predicting the target, and the second feature set is not important for predicting the
+target. We use :class:`~sktree.stats.FeatureImportanceForestClassifier`.
+"""
+
+import numpy as np
+from scipy.special import expit
+
+from sktree import HonestForestClassifier
+from sktree.stats import FeatureImportanceForestClassifier
+from sktree.tree import DecisionTreeClassifier
+
+seed = 12345
+rng = np.random.default_rng(seed)
+
+# %%
+# Simulate data
+# -------------
+# We simulate the two feature sets, and the target variable. We then combine them
+# into a single dataset to perform hypothesis testing.
+
+n_samples = 1000
+n_features_set = 500
+mean = 1.0
+sigma = 2.0
+beta = 5.0
+
+unimportant_mean = 0.0
+unimportant_sigma = 4.5
+
+# first sample the informative features, and then the uniformative features
+X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
+X_important = np.hstack(
+    [
+        X_important,
+        rng.normal(
+            loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
+        ),
+    ]
+)
+
+X_unimportant = rng.normal(
+    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
+)
+X = np.hstack([X_important, X_unimportant])
+
+# simulate the binary target variable
+y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)
+
+# %%
+# Perform hypothesis testing using Mutual Information
+# ---------------------------------------------------
+# Here, we use :class:`~sktree.stats.FeatureImportanceForestClassifier` to perform the hypothesis
+# test. The test statistic is computed by comparing the metric (i.e. mutual information) estimated
+# between two forests. One forest is trained on the original dataset, and one forest is trained
+# on a permuted dataset, where the rows of the ``covariate_index`` columns are shuffled randomly.
+#
+# The null distribution is then estimated in an efficient manner using the framework of
+# :footcite:`coleman2022scalable`. The sample evaluations of each forest (i.e. the posteriors)
+# are sampled randomly ``n_repeats`` times to generate a null distribution. The pvalue is then
+# computed as the proportion of samples in the null distribution that are less than the
+# observed test statistic.
+
+n_estimators = 200
+max_features = "sqrt"
+test_size = 0.2
+n_repeats = 1000
+n_jobs = -1
+
+est = FeatureImportanceForestClassifier(
+    estimator=HonestForestClassifier(
+        n_estimators=n_estimators,
+        max_features=max_features,
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=seed,
+        honest_fraction=0.7,
+        n_jobs=n_jobs,
+    ),
+    random_state=seed,
+    test_size=test_size,
+    permute_per_tree=True,
+    sample_dataset_per_tree=False,
+)
+
+print(
+    f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
+    f"{est.sample_dataset_per_tree}"
+)
+# we test for the first feature set, which is important and thus should return a pvalue < 0.05
+stat, pvalue = est.test(
+    X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats
+)
+print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+
+# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
+stat, pvalue = est.test(
+    X,
+    y,
+    covariate_index=np.arange(n_features_set, dtype=int) + n_features_set,
+    metric="mi",
+    n_repeats=n_repeats,
+)
+print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+
+# %%
+# References
+# ----------
+# .. footbibliography::
diff --git a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
new file mode 100644
index 000000000..14c260725
--- /dev/null
+++ b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
@@ -0,0 +1,138 @@
+"""
+===============================================================================
+Mutual Information for Gigantic Hypothesis Testing (MIGHT) with Imbalanced Data
+===============================================================================
+
+An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
+multivariate hypothesis test, on simulated datasets. Here, we present a simulation
+of how MIGHT is used to test the hypothesis that a "feature set is important for
+predicting the target". This is a generalization of the framework presented in
+:footcite:`coleman2022scalable`.
+
+We simulate a dataset with 1000 features, 500 samples, and a binary class target
+variable. Within each feature set, there is 500 features associated with one feature
+set, and another 500 features associated with another feature set. One could think of
+these for example as different datasets collected on the same patient in a biomedical setting.
+The first feature set (X) is strongly correlated with the target, and the second
+feature set (W) is weakly correlated with the target (y). Here, we are testing the
+null hypothesis:
+
+- ``H0: I(X; y) - I(X, W; y) = 0``
+- ``HA: I(X; y) - I(X, W; y) < 0`` indicating that there is more mutual information with
+    respect to ``y``
+
+where ``I`` is mutual information. For example, this could be true in the following settings,
+where X is our informative feature set and W is our uninformative feature set.
+
+- ``W    X -> y``: here ``W`` is completely disconnected from X and y.
+- ``W -> X -> y``: here ``W`` is d-separated from y given X.
+- ``W <- X -> y``: here ``W`` is d-separated from y given X.
+
+We then use MIGHT to test the hypothesis that the first feature set is important for
+predicting the target, and the second feature set is not important for predicting the
+target. We use :class:`~sktree.stats.FeatureImportanceForestClassifier`.
+"""
+
+import numpy as np
+from scipy.special import expit
+
+from sktree import HonestForestClassifier
+from sktree.stats import FeatureImportanceForestClassifier
+from sktree.tree import DecisionTreeClassifier
+
+seed = 12345
+rng = np.random.default_rng(seed)
+
+# %%
+# Simulate data
+# -------------
+# We simulate the two feature sets, and the target variable. We then combine them
+# into a single dataset to perform hypothesis testing.
+
+n_samples = 1000
+n_features_set = 500
+mean = 1.0
+sigma = 2.0
+beta = 5.0
+
+unimportant_mean = 0.0
+unimportant_sigma = 4.5
+
+# first sample the informative features, and then the uniformative features
+X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
+X_important = np.hstack(
+    [
+        X_important,
+        rng.normal(
+            loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
+        ),
+    ]
+)
+
+X_unimportant = rng.normal(
+    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
+)
+X = np.hstack([X_important, X_unimportant])
+
+# simulate the binary target variable
+y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)
+
+# %%
+# Perform hypothesis testing using Mutual Information
+# ---------------------------------------------------
+# Here, we use :class:`~sktree.stats.FeatureImportanceForestClassifier` to perform the hypothesis
+# test. The test statistic is computed by comparing the metric (i.e. mutual information) estimated
+# between two forests. One forest is trained on the original dataset, and one forest is trained
+# on a permuted dataset, where the rows of the ``covariate_index`` columns are shuffled randomly.
+#
+# The null distribution is then estimated in an efficient manner using the framework of
+# :footcite:`coleman2022scalable`. The sample evaluations of each forest (i.e. the posteriors)
+# are sampled randomly ``n_repeats`` times to generate a null distribution. The pvalue is then
+# computed as the proportion of samples in the null distribution that are less than the
+# observed test statistic.
+
+n_estimators = 200
+max_features = "sqrt"
+test_size = 0.2
+n_repeats = 1000
+n_jobs = -1
+
+est = FeatureImportanceForestClassifier(
+    estimator=HonestForestClassifier(
+        n_estimators=n_estimators,
+        max_features=max_features,
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=seed,
+        honest_fraction=0.7,
+        n_jobs=n_jobs,
+    ),
+    random_state=seed,
+    test_size=test_size,
+    permute_per_tree=True,
+    sample_dataset_per_tree=False,
+)
+
+print(
+    f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
+    f"{est.sample_dataset_per_tree}"
+)
+# we test for the first feature set, which is important and thus should return a pvalue < 0.05
+stat, pvalue = est.test(
+    X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats
+)
+print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+
+# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
+stat, pvalue = est.test(
+    X,
+    y,
+    covariate_index=np.arange(n_features_set, dtype=int) + n_features_set,
+    metric="mi",
+    n_repeats=n_repeats,
+)
+print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+
+# %%
+# References
+# ----------
+# .. footbibliography::
diff --git a/examples/multiview/README.txt b/examples/multiview/README.txt
new file mode 100644
index 000000000..127ad6e57
--- /dev/null
+++ b/examples/multiview/README.txt
@@ -0,0 +1,6 @@
+.. _multiview_examples:
+
+Multi-view learning with Decision-trees
+---------------------------------------
+
+Examples demonstrating multi-view learning using random forest variants.
diff --git a/examples/multiview/plot_multiview_dtc.py b/examples/multiview/plot_multiview_dtc.py
new file mode 100644
index 000000000..84089547e
--- /dev/null
+++ b/examples/multiview/plot_multiview_dtc.py
@@ -0,0 +1,124 @@
+"""
+============================================================
+Analyze a multi-view dataset with a multi-view random forest
+============================================================
+
+An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
+multivariate hypothesis test, on simulated datasets. Here, we present a simulation
+of how MIGHT is used to evaluate how a "feature set is important for predicting the target".
+
+We simulate a dataset with 1000 features, 500 samples, and a binary class target
+variable. Within each feature set, there is 500 features associated with one feature
+set, and another 500 features associated with another feature set. One could think of
+these for example as different datasets collected on the same patient in a biomedical setting.
+The first feature set (X) is strongly correlated with the target, and the second
+feature set (W) is weakly correlated with the target (y).
+
+We then use MIGHT to calculate the partial AUC of these sets.
+"""
+
+import numpy as np
+from sklearn.datasets import make_blobs
+from sklearn.model_selection import cross_val_score
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from collections import defaultdict
+from sktree import MultiViewRandomForestClassifier, RandomForestClassifier
+
+seed = 12345
+rng = np.random.default_rng(seed)
+
+
+def make_multiview_classification(n_samples=100, n_features_1=5, n_features_2=1000, cluster_std=2.0, seed=None):
+    rng = np.random.default_rng(seed=seed)
+
+    # Create a high-dimensional multiview dataset with a low-dimensional informative
+    # subspace in one view of the dataset.
+    X0_first, y0 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    X1_first, y1 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+    y1[:] = 1
+    X0 = np.concatenate([X0_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X1 = np.concatenate([X1_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X = np.vstack((X0, X1))
+    y = np.hstack((y0, y1)).T
+
+    return X, y
+
+# %%
+# Simulate data
+# -------------
+# We simulate a 2-view dataset with both views containing informative low-dimensional features.
+# The first view has five dimensions, while the second view will vary from five to a thousand
+# dimensions. The sample-size will be kept fixed, so we can compare the performance of
+# regular Random forests with Multi-view Random Forests.
+
+n_samples = 1000
+n_features_views = np.linspace(5, 1000, 2).astype(int)
+
+datasets = []
+for idx, n_features in enumerate(n_features_views):
+    X, y = make_multiview_classification(n_samples=n_samples, n_features_1=5, n_features_2=n_features, cluster_std=2.0, seed=seed + idx)
+    datasets.append((X, y))
+
+# %%
+# Fit Random Forest and Multi-view Random Forest
+# ----------------------------------------------
+# Here, we fit both forests over all the datasets.
+
+n_estimators = 50
+n_jobs = -1
+
+scores = defaultdict(list)
+
+for ((X, y), n_features) in zip(datasets, n_features_views):
+    feature_set_ends = np.array([5, n_features + 5])
+
+    rf = RandomForestClassifier(
+        n_estimators=n_estimators,
+        n_jobs=n_jobs,
+    )
+
+    mvrf = MultiViewRandomForestClassifier(
+        n_estimators=n_estimators,
+        n_jobs=n_jobs,
+        feature_set_ends=n_features_views,
+    )
+
+    # obtain the cross-validation score
+    rf_score = cross_val_score(rf, X, y, cv=2).mean()
+    mvrf_score = cross_val_score(mvrf, X, y, cv=2).mean()
+
+    scores['rf'].append(rf_score)
+    scores['mvrf'].append(mvrf_score)
+
+# %%
+# Visualize scores and compare performance
+# ----------------------------------------
+# Now, we can compare the performance from the cross-validation experiment.
+
+scores['n_features'] = n_features_views
+df = pd.DataFrame(scores)
+
+# melt the dataframe, to make it easier to plot
+df = pd.melt(df, id_vars=['n_features'], var_name='model', value_name='score')
+
+fig, ax = plt.subplots()
+sns.lineplot(data=df, x='n_features', y='score', hue='model', label='CV Score', ax=ax)
+ax.set_ylabel('CV Score')
+ax.set_xlabel('Number of features in second view')
+ax.set_title('Random Forest vs Multi-view Random Forest')
+plt.show()

From fdd174f17ed6a682c5daf332513f6139c897e5b8 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 12 Oct 2023 15:36:34 -0400
Subject: [PATCH 4/9] Merging main

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .github/workflows/build_wheels.yml            |   2 +-
 .github/workflows/main.yml                    |   2 +-
 build_requirements.txt                        |   4 +-
 pyproject.toml                                |   2 +-
 requirements.txt                              |   2 +-
 sktree/__init__.py                            |   1 +
 sktree/_lib/meson.build                       |  34 +-
 sktree/_lib/sklearn_fork                      |   2 +-
 sktree/tree/_marginal.pxd                     |  11 +-
 sktree/tree/_marginal.pyx                     |  56 ++--
 sktree/tree/_oblique_splitter.pxd             | 122 ++++----
 sktree/tree/_oblique_splitter.pyx             | 296 +++++++++---------
 sktree/tree/_oblique_tree.pxd                 |  24 +-
 sktree/tree/_oblique_tree.pyx                 |  68 ++--
 sktree/tree/_sklearn_splitter.pxd             |   8 +-
 sktree/tree/_sklearn_splitter.pyx             | 122 ++++----
 sktree/tree/_utils.pxd                        |  19 +-
 sktree/tree/_utils.pyx                        |  52 +--
 sktree/tree/manifold/_morf_splitter.pxd       |  51 ++-
 sktree/tree/manifold/_morf_splitter.pyx       | 152 ++++-----
 sktree/tree/unsupervised/_unsup_criterion.pxd |  34 +-
 sktree/tree/unsupervised/_unsup_criterion.pyx | 130 ++++----
 .../unsupervised/_unsup_oblique_splitter.pxd  |  71 ++---
 .../unsupervised/_unsup_oblique_splitter.pyx  | 120 +++----
 .../tree/unsupervised/_unsup_oblique_tree.pxd |  26 +-
 .../tree/unsupervised/_unsup_oblique_tree.pyx |  64 ++--
 sktree/tree/unsupervised/_unsup_splitter.pxd  |  39 ++-
 sktree/tree/unsupervised/_unsup_splitter.pyx  | 106 +++----
 sktree/tree/unsupervised/_unsup_tree.pxd      |  36 +--
 sktree/tree/unsupervised/_unsup_tree.pyx      | 190 +++++------
 30 files changed, 920 insertions(+), 926 deletions(-)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index b2c415c49..63d8436b5 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -44,7 +44,7 @@ jobs:
           # - [macos-12, macosx_*, arm64]
           - [windows-2019, win, AMD64]
 
-        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11.0-alpha - 3.11.0"]]
+        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]
         # python[0] is used to specify the python versions made by cibuildwheel
     
     env:
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 32298fc7f..70db81873 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -100,7 +100,7 @@ jobs:
 
       - name: Setup build and install scikit-tree
         run: |
-          ./spin build -j 2 --forcesubmodule
+          ./spin build -j 2
 
       - name: Ccache performance
         shell: bash -l {0}
diff --git a/build_requirements.txt b/build_requirements.txt
index 360c66711..3341505a4 100644
--- a/build_requirements.txt
+++ b/build_requirements.txt
@@ -1,9 +1,9 @@
 meson
 meson-python
-cython>=3.0
+cython==0.29.36
 ninja
 numpy
-scikit-learn>=1.3
+scikit-learn>=1.3.1
 click
 rich-click
 doit
diff --git a/pyproject.toml b/pyproject.toml
index d9d901e32..98f6591d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,7 @@ include = [
 dependencies = [
     'numpy',
     'scipy>=1.5.0',
-    'scikit-learn>=1.3'
+    'scikit-learn>=1.3.1'
 ]
 
 
diff --git a/requirements.txt b/requirements.txt
index 92f3a6b2b..dc42ea7e7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 numpy>=1.25
 scipy
-scikit-learn>=1.3
+scikit-learn>=1.3.1
diff --git a/sktree/__init__.py b/sktree/__init__.py
index 1ca249fc0..b6e906702 100644
--- a/sktree/__init__.py
+++ b/sktree/__init__.py
@@ -59,6 +59,7 @@
         )
         from .ensemble._honest_forest import HonestForestClassifier
     except ImportError as e:
+        print(e.msg)
         msg = """Error importing scikit-tree: you cannot import scikit-tree while
         being in scikit-tree source directory; please exit the scikit-tree source
         tree first and relaunch your Python interpreter."""
diff --git a/sktree/_lib/meson.build b/sktree/_lib/meson.build
index ffc70d72c..10dade643 100644
--- a/sktree/_lib/meson.build
+++ b/sktree/_lib/meson.build
@@ -42,4 +42,36 @@ foreach py_source: python_sources
     './sklearn/ensemble/' + py_source,
     subdir: 'sktree/_lib/sklearn/ensemble'
   )
-endforeach
\ No newline at end of file
+endforeach
+
+# TODO: Can remove if included in scikit-learn eventually
+# install tree/ submodule
+extensions = [
+  '_quad_tree',
+]
+
+foreach ext: extensions
+  py3.extension_module(ext,
+    cython_gen_cpp.process('./sklearn/neighbors/' + ext + '.pyx'),
+    c_args: cython_c_args,
+    include_directories: [incdir_numpy,],
+    install: true,
+    subdir: 'sktree/_lib/sklearn/neighbors/',
+  )
+endforeach
+
+# install tree/ submodule
+extensions = [
+  '_typedefs',
+  '_random',
+]
+
+foreach ext: extensions
+  py3.extension_module(ext,
+    cython_gen_cpp.process('./sklearn/utils/' + ext + '.pyx'),
+    c_args: cython_c_args,
+    include_directories: [incdir_numpy,],
+    install: true,
+    subdir: 'sktree/_lib/sklearn/utils/',
+  )
+endforeach
diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index 01d26303a..6c7a5f44e 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit 01d26303ae77ddb8d25cef14feb4be7cd03111f6
+Subproject commit 6c7a5f44eb4ec3bea5dd6a9e4d5db748d12b209e
diff --git a/sktree/tree/_marginal.pxd b/sktree/tree/_marginal.pxd
index 1b65d60b0..cddeed633 100644
--- a/sktree/tree/_marginal.pxd
+++ b/sktree/tree/_marginal.pxd
@@ -2,19 +2,16 @@ import numpy as np
 
 cimport numpy as cnp
 
-from .._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from .._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from .._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from .._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from .._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
 from .._lib.sklearn.tree._tree cimport BaseTree, Node
+from .._lib.sklearn.tree._utils cimport UINT32_t
+from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 
 
 cpdef apply_marginal_tree(
     BaseTree tree,
     object X,
-    const SIZE_t[:] marginal_indices,
-    int traversal_method,
+    const intp_t[:] marginal_indices,
+    intp_t traversal_method,
     unsigned char use_sample_weight,
     object random_state
 )
diff --git a/sktree/tree/_marginal.pyx b/sktree/tree/_marginal.pyx
index 04f167b8d..043ee3263 100644
--- a/sktree/tree/_marginal.pyx
+++ b/sktree/tree/_marginal.pyx
@@ -18,15 +18,15 @@ from numpy import float32 as DTYPE
 
 TREE_LEAF = -1
 TREE_UNDEFINED = -2
-cdef SIZE_t _TREE_LEAF = TREE_LEAF
-cdef SIZE_t _TREE_UNDEFINED = TREE_UNDEFINED
+cdef intp_t _TREE_LEAF = TREE_LEAF
+cdef intp_t _TREE_UNDEFINED = TREE_UNDEFINED
 
 
 cpdef apply_marginal_tree(
     BaseTree tree,
     object X,
-    const SIZE_t[:] marginal_indices,
-    int traversal_method,
+    const intp_t[:] marginal_indices,
+    intp_t traversal_method,
     unsigned char use_sample_weight,
     object random_state
 ):
@@ -41,7 +41,7 @@ cpdef apply_marginal_tree(
     marginal_indices : ndarray of shape (n_marginals,)
         The indices of the features to marginalize, which
         are columns in ``X``.
-    traversal_method : int
+    traversal_method : intp_t
         The traversal method to use. 0 for 'random', 1 for
         'weighted'.
     use_sample_weight : unsigned char
@@ -62,13 +62,13 @@ cpdef apply_marginal_tree(
     if X.dtype != DTYPE:
         raise ValueError("X.dtype should be np.float32, got %s" % X.dtype)
 
-    cdef SIZE_t n_marginals = marginal_indices.shape[0]
+    cdef intp_t n_marginals = marginal_indices.shape[0]
 
     # sklearn_rand_r random number state
     cdef UINT32_t rand_r_state = random_state.randint(0, RAND_R_MAX)
 
     # define a set of all marginal indices
-    cdef unordered_set[SIZE_t] marginal_indices_map
+    cdef unordered_set[intp_t] marginal_indices_map
 
     # check all marginal indices are valid, and also convert to an unordered map
     for i in range(n_marginals):
@@ -94,19 +94,19 @@ cpdef apply_marginal_tree(
 cdef void _resample_split_node(
     BaseTree tree,
     Node* node,
-    unordered_set[SIZE_t] marginal_indices_map,
-    const DTYPE_t[:, :] X,
-    const DOUBLE_t[:, ::1] y,
-    const DOUBLE_t[:] sample_weight,
+    unordered_set[intp_t] marginal_indices_map,
+    const float32_t[:, :] X,
+    const float64_t[:, ::1] y,
+    const float64_t[:] sample_weight,
 ) noexcept nogil:
     pass
 
 
 cdef inline cnp.ndarray _apply_dense_marginal(
     BaseTree tree,
-    const DTYPE_t[:, :] X,
-    unordered_set[SIZE_t] marginal_indices_map,
-    int traversal_method,
+    const float32_t[:, :] X,
+    unordered_set[intp_t] marginal_indices_map,
+    intp_t traversal_method,
     unsigned char use_sample_weight,
     UINT32_t* rand_r_state
 ):
@@ -122,10 +122,10 @@ cdef inline cnp.ndarray _apply_dense_marginal(
         The tree to apply.
     X : const ndarray of shape (n_samples, n_features)
         The data matrix.
-    marginal_indices_map : unordered_set[SIZE_t]
+    marginal_indices_map : unordered_set[intp_t]
         The indices of the features to marginalize, which
         are columns in ``X``.
-    traversal_method : int
+    traversal_method : intp_t
         The traversal method to use. 0 for 'random', 1 for
         'weighted'.
     use_sample_weight : unsigned char
@@ -135,20 +135,20 @@ cdef inline cnp.ndarray _apply_dense_marginal(
         The random number state.
     """
     # Extract input
-    cdef const DTYPE_t[:, :] X_ndarray = X
-    cdef SIZE_t n_samples = X.shape[0]
-    cdef DTYPE_t X_i_node_feature
+    cdef const float32_t[:, :] X_ndarray = X
+    cdef intp_t n_samples = X.shape[0]
+    cdef float32_t X_i_node_feature
 
-    cdef DTYPE_t n_node_samples, n_right_samples, n_left_samples
-    cdef double p_left
-    cdef int is_left
+    cdef float32_t n_node_samples, n_right_samples, n_left_samples
+    cdef float64_t p_left
+    cdef intp_t is_left
 
     # Initialize output
-    cdef SIZE_t[:] out = np.zeros(n_samples, dtype=np.intp)
+    cdef intp_t[:] out = np.zeros(n_samples, dtype=np.intp)
 
     # Initialize auxiliary data-structure
     cdef Node* node = NULL
-    cdef SIZE_t i = 0
+    cdef intp_t i = 0
 
     with nogil:
         for i in prange(n_samples):
@@ -172,7 +172,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
                             n_right_samples = tree.nodes[node.right_child].n_node_samples
 
                         # compute the probabilies for going left and right
-                        p_left = (<double>n_left_samples / n_node_samples)
+                        p_left = (<float64_t>n_left_samples / n_node_samples)
 
                         # randomly sample a direction
                         is_left = rand_weighted_binary(p_left, rand_r_state)
@@ -202,14 +202,14 @@ cdef inline cnp.ndarray _apply_dense_marginal(
                     else:
                         node = &tree.nodes[node.right_child]
 
-            out[i] = <SIZE_t>(node - tree.nodes)  # node offset
+            out[i] = <intp_t>(node - tree.nodes)  # node offset
 
     return np.asarray(out)
 
 
-cdef inline int is_element_present(unordered_set[SIZE_t]& my_set, SIZE_t element) noexcept nogil:
+cdef inline intp_t is_element_present(unordered_set[intp_t]& my_set, intp_t element) noexcept nogil:
     """Helper function to check presence of element in set."""
-    cdef unordered_set[SIZE_t].iterator it = my_set.find(element)
+    cdef unordered_set[intp_t].iterator it = my_set.find(element)
 
     if it != my_set.end():
         return 1
diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
index 3f7b84e43..6da9ac2a7 100644
--- a/sktree/tree/_oblique_splitter.pxd
+++ b/sktree/tree/_oblique_splitter.pxd
@@ -15,28 +15,24 @@ from libcpp.vector cimport vector
 
 from .._lib.sklearn.tree._criterion cimport Criterion
 from .._lib.sklearn.tree._splitter cimport SplitRecord, Splitter
-from .._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from .._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from .._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from .._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from .._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
-from .._lib.sklearn.utils._typedefs cimport intp_t
+from .._lib.sklearn.tree._utils cimport UINT32_t
+from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 from ._sklearn_splitter cimport sort
 
 
 cdef struct ObliqueSplitRecord:
     # Data to track sample split
-    SIZE_t feature              # Which feature to split on.
-    SIZE_t pos                  # Split samples array at the given position,
+    intp_t feature              # Which feature to split on.
+    intp_t pos                  # Split samples array at the given position,
     #                           # i.e. count of samples below threshold for feature.
     #                           # pos is >= end if the node is a leaf.
-    double threshold            # Threshold to split at.
-    double improvement          # Impurity improvement given parent node.
-    double impurity_left        # Impurity of the left split.
-    double impurity_right       # Impurity of the right split.
+    float64_t threshold            # Threshold to split at.
+    float64_t improvement          # Impurity improvement given parent node.
+    float64_t impurity_left        # Impurity of the left split.
+    float64_t impurity_right       # Impurity of the right split.
 
-    vector[DTYPE_t]* proj_vec_weights   # weights of the vector (max_features,)
-    vector[SIZE_t]* proj_vec_indices    # indices of the features (max_features,)
+    vector[float32_t]* proj_vec_weights   # weights of the vector (max_features,)
+    vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
 
 
 cdef class BaseObliqueSplitter(Splitter):
@@ -44,55 +40,55 @@ cdef class BaseObliqueSplitter(Splitter):
     #
 
     # Oblique Splitting extra parameters (mtry, n_dims) matrix
-    cdef vector[vector[DTYPE_t]] proj_mat_weights       # nonzero weights of sparse proj_mat matrix
-    cdef vector[vector[SIZE_t]] proj_mat_indices        # nonzero indices of sparse proj_mat matrix
+    cdef vector[vector[float32_t]] proj_mat_weights       # nonzero weights of sparse proj_mat matrix
+    cdef vector[vector[intp_t]] proj_mat_indices        # nonzero indices of sparse proj_mat matrix
 
     # TODO: assumes all oblique splitters only work with dense data
-    cdef const DTYPE_t[:, :] X
+    cdef const float32_t[:, :] X
 
     # feature weights across (n_dims,)
-    cdef DTYPE_t[:] feature_weights
+    cdef float32_t[:] feature_weights
 
     # All oblique splitters (i.e. non-axis aligned splitters) require a
     # function to sample a projection matrix that is applied to the feature matrix
     # to quickly obtain the sampled projections for candidate splits.
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
 
     # Redefined here since the new logic requires calling sample_proj_mat
-    cdef int node_reset(
+    cdef intp_t node_reset(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        double* weighted_n_node_samples
+        intp_t start,
+        intp_t end,
+        float64_t* weighted_n_node_samples
     ) except -1 nogil
 
     cdef void compute_features_over_samples(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        const SIZE_t[:] samples,
-        DTYPE_t[:] feature_values,
-        vector[DTYPE_t]* proj_vec_weights,  # weights of the vector (max_features,)
-        vector[SIZE_t]* proj_vec_indices    # indices of the features (max_features,)
+        intp_t start,
+        intp_t end,
+        const intp_t[:] samples,
+        float32_t[:] feature_values,
+        vector[float32_t]* proj_vec_weights,  # weights of the vector (max_features,)
+        vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
     ) noexcept nogil
 
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil
 
     cdef inline void fisher_yates_shuffle_memview(
         self,
-        SIZE_t[::1] indices_to_sample,
-        SIZE_t grid_size,
+        intp_t[::1] indices_to_sample,
+        intp_t grid_size,
         UINT32_t* random_state
     ) noexcept nogil
 
@@ -101,51 +97,51 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
     # to split the samples samples[start:end].
 
     # Oblique Splitting extra parameters
-    cdef public double feature_combinations             # Number of features to combine
-    cdef SIZE_t n_non_zeros                             # Number of non-zero features
-    cdef SIZE_t[::1] indices_to_sample                  # an array of indices to sample of size mtry X n_features
+    cdef public float64_t feature_combinations             # Number of features to combine
+    cdef intp_t n_non_zeros                             # Number of non-zero features
+    cdef intp_t[::1] indices_to_sample                  # an array of indices to sample of size mtry X n_features
 
     # All oblique splitters (i.e. non-axis aligned splitters) require a
     # function to sample a projection matrix that is applied to the feature matrix
     # to quickly obtain the sampled projections for candidate splits.
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
 
 
 cdef class BestObliqueSplitter(ObliqueSplitter):
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil
 
 
 cdef class RandomObliqueSplitter(ObliqueSplitter):
     cdef void find_min_max(
         self,
-        DTYPE_t[::1] feature_values,
-        DTYPE_t* min_feature_value_out,
-        DTYPE_t* max_feature_value_out,
+        float32_t[::1] feature_values,
+        float32_t* min_feature_value_out,
+        float32_t* max_feature_value_out,
     ) noexcept nogil
 
-    cdef SIZE_t partition_samples(
+    cdef intp_t partition_samples(
         self,
-        double current_threshold
+        float64_t current_threshold
     ) noexcept nogil
 
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil
 
 
@@ -154,12 +150,12 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
     cdef const intp_t[:] feature_set_ends   # an array indicating the column indices of the end of each feature set
     cdef intp_t n_feature_sets                  # the number of feature sets is the length of feature_set_ends + 1
 
-    cdef vector[vector[SIZE_t]] multi_indices_to_sample
+    cdef vector[vector[intp_t]] multi_indices_to_sample
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
 
 
@@ -172,10 +168,10 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
     # if True, then sample from each feature set for each projection vector
     cdef bint uniform_sampling
 
-    cdef vector[vector[SIZE_t]] multi_indices_to_sample
+    cdef vector[vector[intp_t]] multi_indices_to_sample
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index 92aa612a7..1dfb256c3 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -13,17 +13,17 @@ from sklearn.tree._utils cimport rand_int, rand_uniform
 from .._lib.sklearn.tree._criterion cimport Criterion
 
 
-cdef double INFINITY = np.inf
+cdef float64_t INFINITY = np.inf
 
 # Mitigate precision differences between 32 bit and 64 bit
-cdef DTYPE_t FEATURE_THRESHOLD = 1e-7
+cdef float32_t FEATURE_THRESHOLD = 1e-7
 
 # Constant to switch between algorithm non zero value extract algorithm
 # in SparseSplitter
-cdef DTYPE_t EXTRACT_NNZ_SWITCH = 0.1
+cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
 
-cdef inline void _init_split(ObliqueSplitRecord* self, SIZE_t start_pos) noexcept nogil:
+cdef inline void _init_split(ObliqueSplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY
     self.impurity_right = INFINITY
     self.pos = start_pos
@@ -44,8 +44,8 @@ cdef class BaseObliqueSplitter(Splitter):
     def __setstate__(self, d):
         pass
 
-    cdef int node_reset(self, SIZE_t start, SIZE_t end,
-                        double* weighted_n_node_samples) except -1 nogil:
+    cdef intp_t node_reset(self, intp_t start, intp_t end,
+                           float64_t* weighted_n_node_samples) except -1 nogil:
         """Reset splitter on node samples[start:end].
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
@@ -53,11 +53,11 @@ cdef class BaseObliqueSplitter(Splitter):
 
         Parameters
         ----------
-        start : SIZE_t
+        start : intp_t
             The index of the first sample to consider
-        end : SIZE_t
+        end : intp_t
             The index of the last sample to consider
-        weighted_n_node_samples : ndarray, dtype=double pointer
+        weighted_n_node_samples : ndarray, dtype=float64_t pointer
             The total weight of those samples
         """
 
@@ -80,8 +80,8 @@ cdef class BaseObliqueSplitter(Splitter):
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil:
         """ Sample the projection vector.
 
@@ -89,28 +89,28 @@ cdef class BaseObliqueSplitter(Splitter):
         """
         pass
 
-    cdef int pointer_size(self) noexcept nogil:
+    cdef intp_t pointer_size(self) noexcept nogil:
         """Get size of a pointer to record for ObliqueSplitter."""
 
         return sizeof(ObliqueSplitRecord)
 
     cdef inline void compute_features_over_samples(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        const SIZE_t[:] samples,
-        DTYPE_t[:] feature_values,
-        vector[DTYPE_t]* proj_vec_weights,  # weights of the vector (max_features,)
-        vector[SIZE_t]* proj_vec_indices    # indices of the features (max_features,)
+        intp_t start,
+        intp_t end,
+        const intp_t[:] samples,
+        float32_t[:] feature_values,
+        vector[float32_t]* proj_vec_weights,  # weights of the vector (max_features,)
+        vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
     ) noexcept nogil:
         """Compute the feature values for the samples[start:end] range.
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
         or 0 otherwise.
         """
-        cdef SIZE_t idx, jdx
-        cdef SIZE_t col_idx
-        cdef DTYPE_t col_weight
+        cdef intp_t idx, jdx
+        cdef intp_t col_idx
+        cdef float32_t col_weight
 
         # Compute linear combination of features and then
         # sort samples according to the feature values.
@@ -126,11 +126,11 @@ cdef class BaseObliqueSplitter(Splitter):
 
     cdef inline void fisher_yates_shuffle_memview(
         self,
-        SIZE_t[::1] indices_to_sample,
-        SIZE_t grid_size,
+        intp_t[::1] indices_to_sample,
+        intp_t grid_size,
         UINT32_t* random_state,
     ) noexcept nogil:
-        cdef int i, j
+        cdef intp_t i, j
 
         # XXX: should this be `i` or `i+1`? for valid Fisher-Yates?
         for i in range(0, grid_size - 1):
@@ -142,12 +142,12 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
     def __cinit__(
         self,
         Criterion criterion,
-        SIZE_t max_features,
-        SIZE_t min_samples_leaf,
-        double min_weight_leaf,
+        intp_t max_features,
+        intp_t min_samples_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
+        float64_t feature_combinations,
         *argv
     ):
         """
@@ -156,20 +156,20 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         criterion : Criterion
             The criterion to measure the quality of a split.
 
-        max_features : SIZE_t
+        max_features : intp_t
             The maximal number of randomly selected features which can be
             considered for a split.
 
-        min_samples_leaf : SIZE_t
+        min_samples_leaf : intp_t
             The minimal number of samples each leaf can have, where splits
             which would result in having less samples in a leaf are not
             considered.
 
-        min_weight_leaf : double
+        min_weight_leaf : float64_t
             The minimal weight each leaf can have, where the weight is the sum
             of the weights of each sample in it.
 
-        feature_combinations : double
+        feature_combinations : float64_t
             The average number of features to combine in an oblique split.
             Each feature is independently included with probability
             ``feature_combination`` / ``n_features``.
@@ -190,20 +190,20 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         self.monotonic_cst = monotonic_cst
 
         # Sparse max_features x n_features projection matrix
-        self.proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
-        self.proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
+        self.proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        self.proj_mat_indices = vector[vector[intp_t]](self.max_features)
 
         # Oblique tree parameters
         self.feature_combinations = feature_combinations
 
         # or max w/ 1...
-        self.n_non_zeros = max(int(self.max_features * self.feature_combinations), 1)
+        self.n_non_zeros = max(<intp_t>(self.max_features * self.feature_combinations), 1)
 
-    cdef int init(
+    cdef intp_t init(
         self,
         object X,
-        const DOUBLE_t[:, ::1] y,
-        const DOUBLE_t[:] sample_weight,
+        const float64_t[:, ::1] y,
+        const float64_t[:] sample_weight,
         const unsigned char[::1] missing_values_in_feature_mask,
     ) except -1:
         Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
@@ -215,13 +215,13 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
                                            dtype=np.intp)
 
         # XXX: Just to initialize stuff
-        # self.feature_weights = np.ones((self.n_features,), dtype=DTYPE_t) / self.n_features
+        # self.feature_weights = np.ones((self.n_features,), dtype=float32_t) / self.n_features
         return 0
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil:
         """Sample oblique projection matrix.
 
@@ -243,16 +243,16 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         it is assumed ``feature_combinations`` is forced to be smaller than
         ``n_features`` before instantiating an oblique splitter.
         """
-        cdef SIZE_t n_features = self.n_features
-        cdef SIZE_t n_non_zeros = self.n_non_zeros
+        cdef intp_t n_features = self.n_features
+        cdef intp_t n_non_zeros = self.n_non_zeros
         cdef UINT32_t* random_state = &self.rand_r_state
 
-        cdef int i, feat_i, proj_i, rand_vec_index
-        cdef DTYPE_t weight
+        cdef intp_t i, feat_i, proj_i, rand_vec_index
+        cdef float32_t weight
 
         # construct an array to sample from mTry x n_features set of indices
-        cdef SIZE_t[::1] indices_to_sample = self.indices_to_sample
-        cdef SIZE_t grid_size = self.max_features * self.n_features
+        cdef intp_t[::1] indices_to_sample = self.indices_to_sample
+        cdef intp_t grid_size = self.max_features * self.n_features
 
         # shuffle indices over the 2D grid to sample using Fisher-Yates
         self.fisher_yates_shuffle_memview(indices_to_sample, grid_size, random_state)
@@ -288,13 +288,13 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
                     self.feature_combinations,
                 ), self.__getstate__())
 
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,
+        float64_t impurity,
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil:
         """Find the best_split split on node samples[start:end]
 
@@ -305,24 +305,24 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
         cdef ObliqueSplitRecord* oblique_split = <ObliqueSplitRecord*>(split)
 
         # Draw random splits and pick the best_split
-        cdef SIZE_t[::1] samples = self.samples
-        cdef SIZE_t start = self.start
-        cdef SIZE_t end = self.end
+        cdef intp_t[::1] samples = self.samples
+        cdef intp_t start = self.start
+        cdef intp_t end = self.end
 
         # pointer array to store feature values to split on
-        cdef DTYPE_t[::1]  feature_values = self.feature_values
-        cdef SIZE_t max_features = self.max_features
-        cdef SIZE_t min_samples_leaf = self.min_samples_leaf
+        cdef float32_t[::1]  feature_values = self.feature_values
+        cdef intp_t max_features = self.max_features
+        cdef intp_t min_samples_leaf = self.min_samples_leaf
 
         # keep track of split record for current_split node and the best_split split
         # found among the sampled projection vectors
         cdef ObliqueSplitRecord best_split, current_split
-        cdef double current_proxy_improvement = -INFINITY
-        cdef double best_proxy_improvement = -INFINITY
+        cdef float64_t current_proxy_improvement = -INFINITY
+        cdef float64_t best_proxy_improvement = -INFINITY
 
-        cdef SIZE_t feat_i, p       # index over computed features and start/end
-        cdef SIZE_t partition_end
-        cdef DTYPE_t temp_d         # to compute a projection feature value
+        cdef intp_t feat_i, p       # index over computed features and start/end
+        cdef intp_t partition_end
+        cdef float32_t temp_d         # to compute a projection feature value
 
         # instantiate the split records
         _init_split(&best_split, end)
@@ -449,18 +449,18 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
 
     cdef inline void find_min_max(
         self,
-        DTYPE_t[::1] feature_values,
-        DTYPE_t* min_feature_value_out,
-        DTYPE_t* max_feature_value_out,
+        float32_t[::1] feature_values,
+        float32_t* min_feature_value_out,
+        float32_t* max_feature_value_out,
     ) noexcept nogil:
         """Find the minimum and maximum value for current_feature."""
         cdef:
-            DTYPE_t current_feature_value
-            DTYPE_t min_feature_value = INFINITY
-            DTYPE_t max_feature_value = -INFINITY
-            SIZE_t start = self.start
-            SIZE_t end = self.end
-            SIZE_t p
+            float32_t current_feature_value
+            float32_t min_feature_value = INFINITY
+            float32_t max_feature_value = -INFINITY
+            intp_t start = self.start
+            intp_t end = self.end
+            intp_t p
 
         for p in range(start, end):
             current_feature_value = feature_values[p]
@@ -472,13 +472,13 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
         min_feature_value_out[0] = min_feature_value
         max_feature_value_out[0] = max_feature_value
 
-    cdef inline SIZE_t partition_samples(self, double current_threshold) noexcept nogil:
+    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
         """Partition samples for feature_values at the current_threshold."""
         cdef:
-            SIZE_t p = self.start
-            SIZE_t partition_end = self.end
-            SIZE_t[::1] samples = self.samples
-            DTYPE_t[::1] feature_values = self.feature_values
+            intp_t p = self.start
+            intp_t partition_end = self.end
+            intp_t[::1] samples = self.samples
+            float32_t[::1] feature_values = self.feature_values
 
         while p < partition_end:
             if feature_values[p] <= current_threshold:
@@ -494,13 +494,13 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
         return partition_end
 
     # overwrite the node_split method with random threshold selection
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,
+        float64_t impurity,
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil:
         """Find the best_split split on node samples[start:end]
 
@@ -511,36 +511,36 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
         cdef ObliqueSplitRecord* oblique_split = <ObliqueSplitRecord*>(split)
 
         # Draw random splits and pick the best_split
-        cdef SIZE_t[::1] samples = self.samples
-        cdef SIZE_t start = self.start
-        cdef SIZE_t end = self.end
+        cdef intp_t[::1] samples = self.samples
+        cdef intp_t start = self.start
+        cdef intp_t end = self.end
         cdef UINT32_t* random_state = &self.rand_r_state
 
         # pointer array to store feature values to split on
-        cdef DTYPE_t[::1] feature_values = self.feature_values
-        cdef SIZE_t max_features = self.max_features
-        cdef SIZE_t min_samples_leaf = self.min_samples_leaf
-        cdef double min_weight_leaf = self.min_weight_leaf
+        cdef float32_t[::1] feature_values = self.feature_values
+        cdef intp_t max_features = self.max_features
+        cdef intp_t min_samples_leaf = self.min_samples_leaf
+        cdef float64_t min_weight_leaf = self.min_weight_leaf
 
         # keep track of split record for current_split node and the best_split split
         # found among the sampled projection vectors
         cdef ObliqueSplitRecord best_split, current_split
-        cdef double current_proxy_improvement = -INFINITY
-        cdef double best_proxy_improvement = -INFINITY
+        cdef float64_t current_proxy_improvement = -INFINITY
+        cdef float64_t best_proxy_improvement = -INFINITY
 
-        cdef SIZE_t p
-        cdef SIZE_t feat_i
-        cdef SIZE_t partition_end
-        cdef DTYPE_t temp_d         # to compute a projection feature value
-        cdef DTYPE_t min_feature_value
-        cdef DTYPE_t max_feature_value
+        cdef intp_t p
+        cdef intp_t feat_i
+        cdef intp_t partition_end
+        cdef float32_t temp_d         # to compute a projection feature value
+        cdef float32_t min_feature_value
+        cdef float32_t max_feature_value
 
         # Number of features discovered to be constant during the split search
-        # cdef SIZE_t n_found_constants = 0
-        # cdef SIZE_t n_known_constants = n_constant_features[0]
+        # cdef intp_t n_found_constants = 0
+        # cdef intp_t n_known_constants = n_constant_features[0]
         # n_total_constants = n_known_constants + n_found_constants
-        # cdef SIZE_t n_total_constants = n_known_constants
-        cdef SIZE_t n_visited_features = 0
+        # cdef intp_t n_total_constants = n_known_constants
+        cdef intp_t n_visited_features = 0
 
         # instantiate the split records
         _init_split(&best_split, end)
@@ -663,12 +663,12 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
     def __cinit__(
         self,
         Criterion criterion,
-        SIZE_t max_features,
-        SIZE_t min_samples_leaf,
-        double min_weight_leaf,
+        intp_t max_features,
+        intp_t min_samples_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
+        float64_t feature_combinations,
         const intp_t[:] feature_set_ends,
         intp_t n_feature_sets,
         *argv
@@ -699,11 +699,11 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                     self.n_feature_sets,
                 ), self.__getstate__())
 
-    cdef int init(
+    cdef intp_t init(
         self,
         object X,
-        const DOUBLE_t[:, ::1] y,
-        const DOUBLE_t[:] sample_weight,
+        const float64_t[:, ::1] y,
+        const float64_t[:] sample_weight,
         const unsigned char[::1] missing_values_in_feature_mask,
     ) except -1:
         Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
@@ -711,11 +711,11 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         self.X = X
 
         # create a helper array for allowing efficient Fisher-Yates
-        self.multi_indices_to_sample = vector[vector[SIZE_t]](self.n_feature_sets)
+        self.multi_indices_to_sample = vector[vector[intp_t]](self.n_feature_sets)
 
-        cdef SIZE_t i_feature = 0
-        cdef SIZE_t feature_set_begin = 0
-        cdef SIZE_t size_of_feature_set
+        cdef intp_t i_feature = 0
+        cdef intp_t feature_set_begin = 0
+        cdef intp_t size_of_feature_set
         cdef intp_t ifeat = 0
         for i_feature in range(self.n_feature_sets):
             size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
@@ -727,8 +727,8 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil:
         """Sample projection matrix accounting for multi-views.
 
@@ -736,19 +736,19 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         but now also uniformly samples features from each feature set.
         """
         cdef UINT32_t* random_state = &self.rand_r_state
-        cdef int feat_i, proj_i
-        cdef DTYPE_t weight
+        cdef intp_t feat_i, proj_i
+        cdef float32_t weight
 
         # keep track of the beginning and ending indices of each feature set
-        cdef int idx
-        cdef int ifeature = 0
-        cdef int grid_size
+        cdef intp_t idx
+        cdef intp_t ifeature = 0
+        cdef intp_t grid_size
 
         # 03: Algorithm samples features from each set with equal probability
         proj_i = 0
         cdef bint finished_feature_set = False
 
-        cdef int i, j
+        cdef intp_t i, j
         while proj_i < self.max_features and not finished_feature_set:
             # sample from a feature set
             for idx in range(self.n_feature_sets):
@@ -787,12 +787,12 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
     def __cinit__(
         self,
         Criterion criterion,
-        SIZE_t max_features,
-        SIZE_t min_samples_leaf,
-        double min_weight_leaf,
+        intp_t max_features,
+        intp_t min_samples_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
+        float64_t feature_combinations,
         const intp_t[:] feature_set_ends,
         intp_t n_feature_sets,
         bint uniform_sampling,
@@ -820,11 +820,11 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
                     self.uniform_sampling,
                 ), self.__getstate__())
 
-    cdef int init(
+    cdef intp_t init(
         self,
         object X,
-        const DOUBLE_t[:, ::1] y,
-        const DOUBLE_t[:] sample_weight,
+        const float64_t[:, ::1] y,
+        const float64_t[:] sample_weight,
         const unsigned char[::1] missing_values_in_feature_mask,
     ) except -1:
         Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
@@ -832,11 +832,11 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
         self.X = X
 
         # create a helper array for allowing efficient Fisher-Yates
-        self.multi_indices_to_sample = vector[vector[SIZE_t]](self.n_feature_sets)
+        self.multi_indices_to_sample = vector[vector[intp_t]](self.n_feature_sets)
 
-        cdef SIZE_t i_feature = 0
-        cdef SIZE_t feature_set_begin = 0
-        cdef SIZE_t size_of_feature_set
+        cdef intp_t i_feature = 0
+        cdef intp_t feature_set_begin = 0
+        cdef intp_t size_of_feature_set
         cdef intp_t ifeat = 0
         cdef intp_t iproj = 0
         while iproj < self.max_features:
@@ -856,34 +856,34 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil:
         """Sample projection matrix accounting for multi-views.
 
         This proceeds as a normal sampling projection matrix,
         but now also uniformly samples features from each feature set.
         """
-        cdef SIZE_t n_features = self.n_features
-        cdef SIZE_t n_non_zeros = self.n_non_zeros
+        cdef intp_t n_features = self.n_features
+        cdef intp_t n_non_zeros = self.n_non_zeros
         cdef UINT32_t* random_state = &self.rand_r_state
 
-        cdef int i, j, feat_i, proj_i, rand_vec_index
-        cdef DTYPE_t weight
+        cdef intp_t i, j, feat_i, proj_i, rand_vec_index
+        cdef float32_t weight
 
         # construct an array to sample from mTry x n_features set of indices
-        cdef vector[SIZE_t] indices_to_sample
-        cdef SIZE_t grid_size
+        cdef vector[intp_t] indices_to_sample
+        cdef intp_t grid_size
 
         # compute the number of features in each feature set
-        cdef SIZE_t n_features_in_set
+        cdef intp_t n_features_in_set
 
         # keep track of the beginning and ending indices of each feature set
-        cdef int feature_set_begin, feature_set_end, idx
+        cdef intp_t feature_set_begin, feature_set_end, idx
         feature_set_begin = 0
 
         # keep track of number of features sampled relative to n_non_zeros
-        cdef int ifeature = 0
+        cdef intp_t ifeature = 0
 
         if self.uniform_sampling:
             # 01: This algorithm samples features from each feature set uniformly and combines them
@@ -973,9 +973,9 @@ cdef class MultiViewSplitterTester(MultiViewSplitter):
 
         Returns projection matrix of shape (max_features, n_features).
         """
-        cdef vector[vector[DTYPE_t]] proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
-        cdef vector[vector[SIZE_t]] proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
-        cdef SIZE_t i, j
+        cdef vector[vector[float32_t]] proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        cdef vector[vector[intp_t]] proj_mat_indices = vector[vector[intp_t]](self.max_features)
+        cdef intp_t i, j
 
         # sample projection matrix in C/C++
         self.sample_proj_mat(proj_mat_weights, proj_mat_indices)
@@ -1021,9 +1021,9 @@ cdef class BestObliqueSplitterTester(BestObliqueSplitter):
 
         Returns projection matrix of shape (max_features, n_features).
         """
-        cdef vector[vector[DTYPE_t]] proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
-        cdef vector[vector[SIZE_t]] proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
-        cdef SIZE_t i, j
+        cdef vector[vector[float32_t]] proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        cdef vector[vector[intp_t]] proj_mat_indices = vector[vector[intp_t]](self.max_features)
+        cdef intp_t i, j
 
         # sample projection matrix in C/C++
         self.sample_proj_mat(proj_mat_weights, proj_mat_indices)
diff --git a/sktree/tree/_oblique_tree.pxd b/sktree/tree/_oblique_tree.pxd
index 9f4906a4d..db7813946 100644
--- a/sktree/tree/_oblique_tree.pxd
+++ b/sktree/tree/_oblique_tree.pxd
@@ -14,34 +14,30 @@ cimport numpy as cnp
 from libcpp.vector cimport vector
 
 from .._lib.sklearn.tree._splitter cimport SplitRecord
-from .._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from .._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from .._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from .._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from .._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
 from .._lib.sklearn.tree._tree cimport Node, Tree, TreeBuilder
+from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 from ._oblique_splitter cimport ObliqueSplitRecord
 
 
 cdef class ObliqueTree(Tree):
-    cdef vector[vector[DTYPE_t]] proj_vec_weights  # (capacity, n_features) array of projection vectors
-    cdef vector[vector[SIZE_t]] proj_vec_indices   # (capacity, n_features) array of projection vectors
+    cdef vector[vector[float32_t]] proj_vec_weights  # (capacity, n_features) array of projection vectors
+    cdef vector[vector[intp_t]] proj_vec_indices   # (capacity, n_features) array of projection vectors
 
     # overridden methods
-    cdef int _resize_c(
+    cdef intp_t _resize_c(
         self,
-        SIZE_t capacity=*
+        intp_t capacity=*
     ) except -1 nogil
-    cdef int _set_split_node(
+    cdef intp_t _set_split_node(
         self,
         SplitRecord* split_node,
         Node *node,
-        SIZE_t node_id
+        intp_t node_id
     )  nogil except -1
-    cdef DTYPE_t _compute_feature(
+    cdef float32_t _compute_feature(
         self,
-        const DTYPE_t[:, :] X_ndarray,
-        SIZE_t sample_index,
+        const float32_t[:, :] X_ndarray,
+        intp_t sample_index,
         Node *node
     ) noexcept nogil
     cdef void _compute_feature_importances(
diff --git a/sktree/tree/_oblique_tree.pyx b/sktree/tree/_oblique_tree.pyx
index f59374236..99a3d6fc0 100644
--- a/sktree/tree/_oblique_tree.pyx
+++ b/sktree/tree/_oblique_tree.pyx
@@ -29,7 +29,7 @@ cdef Node dummy
 NODE_DTYPE = np.asarray(<Node[:1]>(&dummy)).dtype
 
 # Mitigate precision differences between 32 bit and 64 bit
-cdef DTYPE_t FEATURE_THRESHOLD = 1e-7
+cdef float32_t FEATURE_THRESHOLD = 1e-7
 
 # =============================================================================
 # ObliqueTree
@@ -47,53 +47,53 @@ cdef class ObliqueTree(Tree):
 
     Attributes
     ----------
-    node_count : int
+    node_count : intp_t
         The number of nodes (internal nodes + leaves) in the tree.
 
-    capacity : int
+    capacity : intp_t
         The current capacity (i.e., size) of the arrays, which is at least as
         great as `node_count`.
 
-    max_depth : int
+    max_depth : intp_t
         The depth of the tree, i.e. the maximum depth of its leaves.
 
-    children_left : array of int, shape [node_count]
+    children_left : array of intp_t, shape [node_count]
         children_left[i] holds the node id of the left child of node i.
         For leaves, children_left[i] == TREE_LEAF. Otherwise,
         children_left[i] > i. This child handles the case where
         X[:, feature[i]] <= threshold[i].
 
-    children_right : array of int, shape [node_count]
+    children_right : array of intp_t, shape [node_count]
         children_right[i] holds the node id of the right child of node i.
         For leaves, children_right[i] == TREE_LEAF. Otherwise,
         children_right[i] > i. This child handles the case where
         X[:, feature[i]] > threshold[i].
 
-    feature : array of int, shape [node_count]
+    feature : array of intp_t, shape [node_count]
         feature[i] holds the feature to split on, for the internal node i.
 
-    threshold : array of double, shape [node_count]
+    threshold : array of float64_t, shape [node_count]
         threshold[i] holds the threshold for the internal node i.
 
-    value : array of double, shape [node_count, n_outputs, max_n_classes]
+    value : array of float64_t, shape [node_count, n_outputs, max_n_classes]
         Contains the constant prediction value of each node.
 
-    impurity : array of double, shape [node_count]
+    impurity : array of float64_t, shape [node_count]
         impurity[i] holds the impurity (i.e., the value of the splitting
         criterion) at node i.
 
-    n_node_samples : array of int, shape [node_count]
+    n_node_samples : array of intp_t, shape [node_count]
         n_node_samples[i] holds the number of training samples reaching node i.
 
-    weighted_n_node_samples : array of int, shape [node_count]
+    weighted_n_node_samples : array of intp_t, shape [node_count]
         weighted_n_node_samples[i] holds the weighted number of training samples
         reaching node i.
     """
     def __cinit__(
         self,
-        int n_features,
-        cnp.ndarray[SIZE_t, ndim=1] n_classes,
-        int n_outputs
+        intp_t n_features,
+        cnp.ndarray[intp_t, ndim=1] n_classes,
+        intp_t n_outputs
     ):
         """Constructor."""
         # Input/Output layout
@@ -105,7 +105,7 @@ cdef class ObliqueTree(Tree):
         self.max_n_classes = np.max(n_classes)
         self.value_stride = n_outputs * self.max_n_classes
 
-        cdef SIZE_t k
+        cdef intp_t k
         for k in range(n_outputs):
             self.n_classes[k] = n_classes[k]
 
@@ -116,8 +116,8 @@ cdef class ObliqueTree(Tree):
         self.value = NULL
         self.nodes = NULL
 
-        self.proj_vec_weights = vector[vector[DTYPE_t]](self.capacity)
-        self.proj_vec_indices = vector[vector[SIZE_t]](self.capacity)
+        self.proj_vec_weights = vector[vector[float32_t]](self.capacity)
+        self.proj_vec_indices = vector[vector[intp_t]](self.capacity)
 
     def __reduce__(self):
         """Reduce re-implementation, for pickling."""
@@ -179,7 +179,7 @@ cdef class ObliqueTree(Tree):
         memcpy(self.nodes, cnp.PyArray_DATA(node_ndarray),
                self.capacity * sizeof(Node))
         memcpy(self.value, cnp.PyArray_DATA(value_ndarray),
-               self.capacity * self.value_stride * sizeof(double))
+               self.capacity * self.value_stride * sizeof(float64_t))
 
     cpdef cnp.ndarray get_projection_matrix(self):
         """Get the projection matrix of shape (node_count, n_features)."""
@@ -191,7 +191,7 @@ cdef class ObliqueTree(Tree):
                 proj_vecs[i, feat] = weight
         return proj_vecs
 
-    cdef int _resize_c(self, SIZE_t capacity=INTPTR_MAX) except -1 nogil:
+    cdef intp_t _resize_c(self, intp_t capacity=INTPTR_MAX) except -1 nogil:
         """Guts of _resize.
 
         Additionally resizes the projection indices and weights.
@@ -220,7 +220,7 @@ cdef class ObliqueTree(Tree):
         # value memory is initialised to 0 to enable classifier argmax
         if capacity > self.capacity:
             memset(<void*>(self.value + self.capacity * self.value_stride), 0,
-                   (capacity - self.capacity) * self.value_stride * sizeof(double))
+                   (capacity - self.capacity) * self.value_stride * sizeof(float64_t))
 
         # if capacity smaller than node_count, adjust the counter
         if capacity < self.node_count:
@@ -229,7 +229,7 @@ cdef class ObliqueTree(Tree):
         self.capacity = capacity
         return 0
 
-    cdef int _set_split_node(self, SplitRecord* split_node, Node *node, SIZE_t node_id) except -1 nogil:
+    cdef intp_t _set_split_node(self, SplitRecord* split_node, Node *node, intp_t node_id) except -1 nogil:
         """Set node data.
         """
         # Cython type cast split record into its inherited split record
@@ -250,10 +250,10 @@ cdef class ObliqueTree(Tree):
         )
         return 1
 
-    cdef DTYPE_t _compute_feature(
+    cdef float32_t _compute_feature(
         self,
-        const DTYPE_t[:, :] X_ndarray,
-        SIZE_t sample_index,
+        const float32_t[:, :] X_ndarray,
+        intp_t sample_index,
         Node *node
     ) noexcept nogil:
         """Compute feature from a given data matrix, X.
@@ -261,15 +261,15 @@ cdef class ObliqueTree(Tree):
         In oblique-aligned trees, this is the projection of X.
         In this case, we take a simple linear combination of some columns of X.
         """
-        cdef DTYPE_t proj_feat = 0.0
-        cdef DTYPE_t weight = 0.0
-        cdef int j = 0
-        cdef SIZE_t feature_index
+        cdef float32_t proj_feat = 0.0
+        cdef float32_t weight = 0.0
+        cdef intp_t j = 0
+        cdef intp_t feature_index
 
         # get the index of the node
-        cdef SIZE_t node_id = node - self.nodes
+        cdef intp_t node_id = node - self.nodes
 
-        # cdef SIZE_t n_projections = proj_vec_indices.size()
+        # cdef intp_t n_projections = proj_vec_indices.size()
         # compute projection of the data based on trained tree
         # proj_vec_weights = self.proj_vec_weights[node_id]
         # proj_vec_indices = self.proj_vec_indices[node_id]
@@ -299,13 +299,13 @@ cdef class ObliqueTree(Tree):
         cdef Node* right
 
         # get the index of the node
-        cdef SIZE_t node_id = node - self.nodes
+        cdef intp_t node_id = node - self.nodes
 
         left = &nodes[node.left_child]
         right = &nodes[node.right_child]
 
-        cdef int i, feature_index
-        cdef DTYPE_t weight
+        cdef intp_t i, feature_index
+        cdef float32_t weight
         for i in range(0, self.proj_vec_indices[node_id].size()):
             feature_index = self.proj_vec_indices[node_id][i]
             weight = self.proj_vec_weights[node_id][i]
diff --git a/sktree/tree/_sklearn_splitter.pxd b/sktree/tree/_sklearn_splitter.pxd
index e5db44c4a..6a659998b 100644
--- a/sktree/tree/_sklearn_splitter.pxd
+++ b/sktree/tree/_sklearn_splitter.pxd
@@ -1,11 +1,7 @@
-from .._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from .._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from .._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from .._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from .._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
+from .._lib.sklearn.utils._typedefs cimport float32_t, int32_t, intp_t
 
 # This defines c-importable functions for other cython files
 
 # TODO: remove these files when sklearn merges refactor defining these in pxd files
 # https://github.com/scikit-learn/scikit-learn/pull/25606
-cdef void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) noexcept nogil
+cdef void sort(float32_t* Xf, intp_t* samples, intp_t n) noexcept nogil
diff --git a/sktree/tree/_sklearn_splitter.pyx b/sktree/tree/_sklearn_splitter.pyx
index 529abde12..92c298353 100644
--- a/sktree/tree/_sklearn_splitter.pyx
+++ b/sktree/tree/_sklearn_splitter.pyx
@@ -9,7 +9,7 @@ from sklearn.tree._utils cimport log
 
 # Sort n-element arrays pointed to by Xf and samples, simultaneously,
 # by the values in Xf. Algorithm: Introsort (Musser, SP&E, 1997).
-cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) noexcept nogil:
+cdef inline void sort(float32_t* Xf, intp_t* samples, intp_t n) noexcept nogil:
     if n == 0:
         return
     cdef int maxd = 2 * <int>log(n)
@@ -17,20 +17,20 @@ cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) noexcept nogil:
 
 
 cdef inline void swap(
-    DTYPE_t* Xf,
-    SIZE_t* samples,
-    SIZE_t i,
-    SIZE_t j
+    float32_t* Xf,
+    intp_t* samples,
+    intp_t i,
+    intp_t j
 ) noexcept nogil:
     # Helper for sort
     Xf[i], Xf[j] = Xf[j], Xf[i]
     samples[i], samples[j] = samples[j], samples[i]
 
 
-cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) noexcept nogil:
+cdef inline float32_t median3(float32_t* Xf, intp_t n) noexcept nogil:
     # Median of three pivot selection, after Bentley and McIlroy (1993).
     # Engineering a sort function. SP&E. Requires 8/3 comparisons on average.
-    cdef DTYPE_t a = Xf[0], b = Xf[n // 2], c = Xf[n - 1]
+    cdef float32_t a = Xf[0], b = Xf[n // 2], c = Xf[n - 1]
     if a < b:
         if b < c:
             return b
@@ -49,10 +49,10 @@ cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) noexcept nogil:
 
 # Introsort with median of 3 pivot selection and 3-way partition function
 # (robust to repeated elements, e.g. lots of zero features).
-cdef void introsort(DTYPE_t* Xf, SIZE_t *samples,
-                    SIZE_t n, int maxd) noexcept nogil:
-    cdef DTYPE_t pivot
-    cdef SIZE_t i, l, r
+cdef void introsort(float32_t* Xf, intp_t *samples,
+                    intp_t n, int maxd) noexcept nogil:
+    cdef float32_t pivot
+    cdef intp_t i, l, r
 
     while n > 1:
         if maxd <= 0:   # max depth limit exceeded ("gone quadratic")
@@ -82,10 +82,10 @@ cdef void introsort(DTYPE_t* Xf, SIZE_t *samples,
         n -= r
 
 
-cdef inline void sift_down(DTYPE_t* Xf, SIZE_t* samples,
-                           SIZE_t start, SIZE_t end) noexcept nogil:
+cdef inline void sift_down(float32_t* Xf, intp_t* samples,
+                           intp_t start, intp_t end) noexcept nogil:
     # Restore heap order in Xf[start:end] by moving the max element to start.
-    cdef SIZE_t child, maxind, root
+    cdef intp_t child, maxind, root
 
     root = start
     while True:
@@ -105,8 +105,8 @@ cdef inline void sift_down(DTYPE_t* Xf, SIZE_t* samples,
             root = maxind
 
 
-cdef void heapsort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) noexcept nogil:
-    cdef SIZE_t start, end
+cdef void heapsort(float32_t* Xf, intp_t* samples, intp_t n) noexcept nogil:
+    cdef intp_t start, end
 
     # heapify
     start = (n - 2) // 2
@@ -125,20 +125,20 @@ cdef void heapsort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) noexcept nogil:
         end = end - 1
 
 
-cdef int compare_SIZE_t(const void* a, const void* b) noexcept nogil:
+cdef int compare_intp_t(const void* a, const void* b) noexcept nogil:
     """Comparison function for sort."""
-    return <int>((<SIZE_t*>a)[0] - (<SIZE_t*>b)[0])
+    return <int>((<intp_t*>a)[0] - (<intp_t*>b)[0])
 
 
-cdef inline void binary_search(INT32_t[::1] sorted_array,
-                               INT32_t start, INT32_t end,
-                               SIZE_t value, SIZE_t* index,
-                               INT32_t* new_start) noexcept nogil:
+cdef inline void binary_search(int32_t[::1] sorted_array,
+                               int32_t start, int32_t end,
+                               intp_t value, intp_t* index,
+                               int32_t* new_start) noexcept nogil:
     """Return the index of value in the sorted array.
 
     If not found, return -1. new_start is the last pivot + 1
     """
-    cdef INT32_t pivot
+    cdef int32_t pivot
     index[0] = -1
     while start < end:
         pivot = start + (end - start) // 2
@@ -155,25 +155,25 @@ cdef inline void binary_search(INT32_t[::1] sorted_array,
     new_start[0] = start
 
 
-cdef inline void extract_nnz_index_to_samples(INT32_t[::1] X_indices,
-                                              DTYPE_t[::1] X_data,
-                                              INT32_t indptr_start,
-                                              INT32_t indptr_end,
-                                              SIZE_t[::1] samples,
-                                              SIZE_t start,
-                                              SIZE_t end,
-                                              SIZE_t[::1] index_to_samples,
-                                              DTYPE_t[::1] Xf,
-                                              SIZE_t* end_negative,
-                                              SIZE_t* start_positive) noexcept nogil:
+cdef inline void extract_nnz_index_to_samples(int32_t[::1] X_indices,
+                                              float32_t[::1] X_data,
+                                              int32_t indptr_start,
+                                              int32_t indptr_end,
+                                              intp_t[::1] samples,
+                                              intp_t start,
+                                              intp_t end,
+                                              intp_t[::1] index_to_samples,
+                                              float32_t[::1] Xf,
+                                              intp_t* end_negative,
+                                              intp_t* start_positive) noexcept nogil:
     """Extract and partition values for a feature using index_to_samples.
 
     Complexity is O(indptr_end - indptr_start).
     """
-    cdef INT32_t k
-    cdef SIZE_t index
-    cdef SIZE_t end_negative_ = start
-    cdef SIZE_t start_positive_ = end
+    cdef int32_t k
+    cdef intp_t index
+    cdef intp_t end_negative_ = start
+    cdef intp_t start_positive_ = end
 
     for k in range(indptr_start, indptr_end):
         if start <= index_to_samples[X_indices[k]] < end:
@@ -194,18 +194,18 @@ cdef inline void extract_nnz_index_to_samples(INT32_t[::1] X_indices,
     start_positive[0] = start_positive_
 
 
-cdef inline void extract_nnz_binary_search(INT32_t[::1] X_indices,
-                                           DTYPE_t[::1] X_data,
-                                           INT32_t indptr_start,
-                                           INT32_t indptr_end,
-                                           SIZE_t[::1] samples,
-                                           SIZE_t start,
-                                           SIZE_t end,
-                                           SIZE_t[::1] index_to_samples,
-                                           DTYPE_t[::1] Xf,
-                                           SIZE_t* end_negative,
-                                           SIZE_t* start_positive,
-                                           SIZE_t[::1] sorted_samples,
+cdef inline void extract_nnz_binary_search(int32_t[::1] X_indices,
+                                           float32_t[::1] X_data,
+                                           int32_t indptr_start,
+                                           int32_t indptr_end,
+                                           intp_t[::1] samples,
+                                           intp_t start,
+                                           intp_t end,
+                                           intp_t[::1] index_to_samples,
+                                           float32_t[::1] Xf,
+                                           intp_t* end_negative,
+                                           intp_t* start_positive,
+                                           intp_t[::1] sorted_samples,
                                            bint* is_samples_sorted) noexcept nogil:
     """Extract and partition values for a given feature using binary search.
 
@@ -215,14 +215,14 @@ cdef inline void extract_nnz_binary_search(INT32_t[::1] X_indices,
         O((1 - is_samples_sorted[0]) * n_samples * log(n_samples) +
           n_samples * log(n_indices)).
     """
-    cdef SIZE_t n_samples
+    cdef intp_t n_samples
 
     if not is_samples_sorted[0]:
         n_samples = end - start
         memcpy(&sorted_samples[start], &samples[start],
-               n_samples * sizeof(SIZE_t))
-        qsort(&sorted_samples[start], n_samples, sizeof(SIZE_t),
-              compare_SIZE_t)
+               n_samples * sizeof(intp_t))
+        qsort(&sorted_samples[start], n_samples, sizeof(intp_t),
+              compare_intp_t)
         is_samples_sorted[0] = 1
 
     while (indptr_start < indptr_end and
@@ -233,11 +233,11 @@ cdef inline void extract_nnz_binary_search(INT32_t[::1] X_indices,
            sorted_samples[end - 1] < X_indices[indptr_end - 1]):
         indptr_end -= 1
 
-    cdef SIZE_t p = start
-    cdef SIZE_t index
-    cdef SIZE_t k
-    cdef SIZE_t end_negative_ = start
-    cdef SIZE_t start_positive_ = end
+    cdef intp_t p = start
+    cdef intp_t index
+    cdef intp_t k
+    cdef intp_t end_negative_ = start
+    cdef intp_t start_positive_ = end
 
     while (p < end and indptr_start < indptr_end):
         # Find index of sorted_samples[p] in X_indices
@@ -264,8 +264,8 @@ cdef inline void extract_nnz_binary_search(INT32_t[::1] X_indices,
     start_positive[0] = start_positive_
 
 
-cdef inline void sparse_swap(SIZE_t[::1] index_to_samples, SIZE_t[::1] samples,
-                             SIZE_t pos_1, SIZE_t pos_2) noexcept nogil:
+cdef inline void sparse_swap(intp_t[::1] index_to_samples, intp_t[::1] samples,
+                             intp_t pos_1, intp_t pos_2) noexcept nogil:
     """Swap sample pos_1 and pos_2 preserving sparse invariant."""
     samples[pos_1], samples[pos_2] = samples[pos_2], samples[pos_1]
     index_to_samples[samples[pos_1]] = pos_1
diff --git a/sktree/tree/_utils.pxd b/sktree/tree/_utils.pxd
index fc93163b2..c44454914 100644
--- a/sktree/tree/_utils.pxd
+++ b/sktree/tree/_utils.pxd
@@ -4,22 +4,19 @@ cimport numpy as cnp
 
 cnp.import_array()
 
-from sktree._lib.sklearn.tree._splitter cimport SplitRecord
-from sktree._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from sktree._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from sktree._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from sktree._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from sktree._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
+from .._lib.sklearn.tree._splitter cimport SplitRecord
+from .._lib.sklearn.tree._utils cimport UINT32_t
+from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, int32_t, intp_t
 
 
-cdef int rand_weighted_binary(double p0, UINT32_t* random_state) noexcept nogil
+cdef int rand_weighted_binary(float64_t p0, UINT32_t* random_state) noexcept nogil
 
 cpdef unravel_index(
-    SIZE_t index, cnp.ndarray[SIZE_t, ndim=1] shape
+    intp_t index, cnp.ndarray[intp_t, ndim=1] shape
 )
 
-cpdef ravel_multi_index(SIZE_t[:] coords, const SIZE_t[:] shape)
+cpdef ravel_multi_index(intp_t[:] coords, const intp_t[:] shape)
 
-cdef void unravel_index_cython(SIZE_t index, const SIZE_t[:] shape, SIZE_t[:] coords) noexcept nogil
+cdef void unravel_index_cython(intp_t index, const intp_t[:] shape, intp_t[:] coords) noexcept nogil
 
-cdef SIZE_t ravel_multi_index_cython(SIZE_t[:] coords, const SIZE_t[:] shape) noexcept nogil
+cdef intp_t ravel_multi_index_cython(intp_t[:] coords, const intp_t[:] shape) noexcept nogil
diff --git a/sktree/tree/_utils.pyx b/sktree/tree/_utils.pyx
index 1b7b6c293..84dc2de34 100644
--- a/sktree/tree/_utils.pyx
+++ b/sktree/tree/_utils.pyx
@@ -11,20 +11,20 @@ cimport numpy as cnp
 
 cnp.import_array()
 
-from sktree._lib.sklearn.tree._utils cimport rand_uniform
+from .._lib.sklearn.tree._utils cimport rand_uniform
 
 
-cdef inline int rand_weighted_binary(double p0, UINT32_t* random_state) noexcept nogil:
+cdef inline int rand_weighted_binary(float64_t p0, UINT32_t* random_state) noexcept nogil:
     """Sample from integers 0 and 1 with different probabilities.
 
     Parameters
     ----------
-    p0 : double
+    p0 : float64_t
         The probability of sampling 0.
     random_state : UINT32_t*
         The random state.
     """
-    cdef double random_value = rand_uniform(0.0, 1.0, random_state)
+    cdef float64_t random_value = rand_uniform(0.0, 1.0, random_state)
 
     if random_value < p0:
         return 0
@@ -32,8 +32,8 @@ cdef inline int rand_weighted_binary(double p0, UINT32_t* random_state) noexcept
         return 1
 
 cpdef unravel_index(
-    SIZE_t index,
-    cnp.ndarray[SIZE_t, ndim=1] shape
+    intp_t index,
+    cnp.ndarray[intp_t, ndim=1] shape
 ):
     """Converts a flat index or array of flat indices into a tuple of coordinate arrays.
 
@@ -41,14 +41,14 @@ cpdef unravel_index(
 
     Parameters
     ----------
-    index : SIZE_t
+    index : intp_t
         A flat index.
-    shape : numpy.ndarray[SIZE_t, ndim=1]
+    shape : numpy.ndarray[intp_t, ndim=1]
         The shape of the array into which the flat indices should be converted.
 
     Returns
     -------
-    numpy.ndarray[SIZE_t, ndim=1]
+    numpy.ndarray[intp_t, ndim=1]
         A coordinate array having the same shape as the input `shape`.
     """
     index = np.intp(index)
@@ -58,21 +58,21 @@ cpdef unravel_index(
     return coords
 
 
-cpdef ravel_multi_index(SIZE_t[:] coords, const SIZE_t[:] shape):
+cpdef ravel_multi_index(intp_t[:] coords, const intp_t[:] shape):
     """Converts a tuple of coordinate arrays into a flat index.
 
     Purely used for testing purposes.
 
     Parameters
     ----------
-    coords : numpy.ndarray[SIZE_t, ndim=1]
+    coords : numpy.ndarray[intp_t, ndim=1]
         An array of coordinate arrays to be converted.
-    shape : numpy.ndarray[SIZE_t, ndim=1]
+    shape : numpy.ndarray[intp_t, ndim=1]
         The shape of the array into which the coordinates should be converted.
 
     Returns
     -------
-    SIZE_t
+    intp_t
         The resulting flat index.
 
     Raises
@@ -83,25 +83,25 @@ cpdef ravel_multi_index(SIZE_t[:] coords, const SIZE_t[:] shape):
     return ravel_multi_index_cython(coords, shape)
 
 
-cdef void unravel_index_cython(SIZE_t index, const SIZE_t[:] shape, SIZE_t[:] coords) noexcept nogil:
+cdef void unravel_index_cython(intp_t index, const intp_t[:] shape, intp_t[:] coords) noexcept nogil:
     """Converts a flat index into a tuple of coordinate arrays.
 
     Parameters
     ----------
-    index : SIZE_t
+    index : intp_t
         The flat index to be converted.
-    shape : numpy.ndarray[SIZE_t, ndim=1]
+    shape : numpy.ndarray[intp_t, ndim=1]
         The shape of the array into which the flat index should be converted.
-    coords : numpy.ndarray[SIZE_t, ndim=1]
+    coords : numpy.ndarray[intp_t, ndim=1]
         A preinitialized memoryview array of coordinate arrays to be converted.
 
     Returns
     -------
-    numpy.ndarray[SIZE_t, ndim=1]
+    numpy.ndarray[intp_t, ndim=1]
         An array of coordinate arrays, with each coordinate array having the same shape as the input `shape`.
     """
-    cdef SIZE_t ndim = shape.shape[0]
-    cdef SIZE_t j, size
+    cdef intp_t ndim = shape.shape[0]
+    cdef intp_t j, size
 
     for j in range(ndim - 1, -1, -1):
         size = shape[j]
@@ -109,19 +109,19 @@ cdef void unravel_index_cython(SIZE_t index, const SIZE_t[:] shape, SIZE_t[:] co
         index //= size
 
 
-cdef SIZE_t ravel_multi_index_cython(SIZE_t[:] coords, const SIZE_t[:] shape) noexcept nogil:
+cdef intp_t ravel_multi_index_cython(intp_t[:] coords, const intp_t[:] shape) noexcept nogil:
     """Converts a tuple of coordinate arrays into a flat index.
 
     Parameters
     ----------
-    coords : numpy.ndarray[SIZE_t, ndim=1]
+    coords : numpy.ndarray[intp_t, ndim=1]
         An array of coordinate arrays to be converted.
-    shape : numpy.ndarray[SIZE_t, ndim=1]
+    shape : numpy.ndarray[intp_t, ndim=1]
         The shape of the array into which the coordinates should be converted.
 
     Returns
     -------
-    SIZE_t
+    intp_t
         The resulting flat index.
 
     Raises
@@ -129,8 +129,8 @@ cdef SIZE_t ravel_multi_index_cython(SIZE_t[:] coords, const SIZE_t[:] shape) no
     ValueError
         If the input `coords` have invalid indices.
     """
-    cdef SIZE_t i, ndim
-    cdef SIZE_t flat_index, index
+    cdef intp_t i, ndim
+    cdef intp_t flat_index, index
 
     ndim = len(shape)
 
diff --git a/sktree/tree/manifold/_morf_splitter.pxd b/sktree/tree/manifold/_morf_splitter.pxd
index a05bb50e8..2bdb7ab56 100644
--- a/sktree/tree/manifold/_morf_splitter.pxd
+++ b/sktree/tree/manifold/_morf_splitter.pxd
@@ -14,11 +14,8 @@ cimport numpy as cnp
 from libcpp.vector cimport vector
 
 from ..._lib.sklearn.tree._splitter cimport SplitRecord
-from ..._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from ..._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from ..._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from ..._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
+from ..._lib.sklearn.tree._utils cimport UINT32_t
+from ..._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 from .._oblique_splitter cimport BestObliqueSplitter, ObliqueSplitRecord
 
 # https://github.com/cython/cython/blob/master/Cython/Includes/libcpp/algorithm.pxd
@@ -44,49 +41,49 @@ cdef class PatchSplitter(BestObliqueSplitter):
     # `data_width` are used to determine the vectorized indices corresponding to
     # (x,y) coordinates in the original un-vectorized data.
 
-    cdef public SIZE_t max_patch_height                 # Maximum height of the patch to sample
-    cdef public SIZE_t max_patch_width                  # Maximum width of the patch to sample
-    cdef public SIZE_t min_patch_height                 # Minimum height of the patch to sample
-    cdef public SIZE_t min_patch_width                  # Minimum width of the patch to sample
-    cdef public SIZE_t data_height                      # Height of the input data
-    cdef public SIZE_t data_width                       # Width of the input data
+    cdef public intp_t max_patch_height                 # Maximum height of the patch to sample
+    cdef public intp_t max_patch_width                  # Maximum width of the patch to sample
+    cdef public intp_t min_patch_height                 # Minimum height of the patch to sample
+    cdef public intp_t min_patch_width                  # Minimum width of the patch to sample
+    cdef public intp_t data_height                      # Height of the input data
+    cdef public intp_t data_width                       # Width of the input data
 
-    cdef public SIZE_t ndim                       # The number of dimensions of the input data
+    cdef public intp_t ndim                       # The number of dimensions of the input data
 
-    cdef const SIZE_t[:] data_dims                      # The dimensions of the input data
-    cdef const SIZE_t[:] min_patch_dims                 # The minimum size of the patch to sample in each dimension
-    cdef const SIZE_t[:] max_patch_dims                 # The maximum size of the patch to sample in each dimension
+    cdef const intp_t[:] data_dims                      # The dimensions of the input data
+    cdef const intp_t[:] min_patch_dims                 # The minimum size of the patch to sample in each dimension
+    cdef const intp_t[:] max_patch_dims                 # The maximum size of the patch to sample in each dimension
     cdef const cnp.uint8_t[:] dim_contiguous            # A boolean array indicating whether each dimension is contiguous
 
     # TODO: check if this works and is necessary for discontiguous data
-    # cdef SIZE_t[:] stride_offsets                # The stride offsets for each dimension
+    # cdef intp_t[:] stride_offsets                # The stride offsets for each dimension
     cdef bint _discontiguous
 
     cdef bytes boundary                               # how to sample the patch with boundary in mind
-    cdef const DTYPE_t[:, :] feature_weight               # Whether or not to normalize each column of X when adding in a patch
+    cdef const float32_t[:, :] feature_weight               # Whether or not to normalize each column of X when adding in a patch
 
-    cdef SIZE_t[::1] _index_data_buffer
-    cdef SIZE_t[::1] _index_patch_buffer
-    cdef SIZE_t[:] patch_dims_buff                # A buffer to store the dimensions of the sampled patch
-    cdef SIZE_t[:] unraveled_patch_point          # A buffer to store the unraveled patch point
+    cdef intp_t[::1] _index_data_buffer
+    cdef intp_t[::1] _index_patch_buffer
+    cdef intp_t[:] patch_dims_buff                # A buffer to store the dimensions of the sampled patch
+    cdef intp_t[:] unraveled_patch_point          # A buffer to store the unraveled patch point
 
     # All oblique splitters (i.e. non-axis aligned splitters) require a
     # function to sample a projection matrix that is applied to the feature matrix
     # to quickly obtain the sampled projections for candidate splits.
-    cdef (SIZE_t, SIZE_t) sample_top_left_seed(
+    cdef (intp_t, intp_t) sample_top_left_seed(
         self
     ) noexcept nogil
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
 
 
 # cdef class UserKernelSplitter(PatchSplitter):
 #     """A class to hold user-specified kernels."""
-#     cdef vector[DTYPE_t[:, ::1]] kernel_dictionary  # A list of C-contiguous 2D kernels
+#     cdef vector[float32_t[:, ::1]] kernel_dictionary  # A list of C-contiguous 2D kernels
 
 
 cdef class GaussianKernelSplitter(PatchSplitter):
@@ -99,6 +96,6 @@ cdef class GaussianKernelSplitter(PatchSplitter):
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
diff --git a/sktree/tree/manifold/_morf_splitter.pyx b/sktree/tree/manifold/_morf_splitter.pyx
index 95c37a2da..79be42a47 100644
--- a/sktree/tree/manifold/_morf_splitter.pyx
+++ b/sktree/tree/manifold/_morf_splitter.pyx
@@ -30,22 +30,22 @@ cdef class PatchSplitter(BestObliqueSplitter):
     def __setstate__(self, d):
         pass
 
-    cdef int init(
+    cdef intp_t init(
         self,
         object X,
-        const DOUBLE_t[:, ::1] y,
-        const DOUBLE_t[:] sample_weight,
+        const float64_t[:, ::1] y,
+        const float64_t[:] sample_weight,
         const unsigned char[::1] missing_values_in_feature_mask,
     ) except -1:
         BestObliqueSplitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
 
         return 0
 
-    cdef int node_reset(
+    cdef intp_t node_reset(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        double* weighted_n_node_samples
+        intp_t start,
+        intp_t end,
+        float64_t* weighted_n_node_samples
     ) except -1 nogil:
         """Reset splitter on node samples[start:end].
 
@@ -54,11 +54,11 @@ cdef class PatchSplitter(BestObliqueSplitter):
 
         Parameters
         ----------
-        start : SIZE_t
+        start : intp_t
             The index of the first sample to consider
-        end : SIZE_t
+        end : intp_t
             The index of the last sample to consider
-        weighted_n_node_samples : ndarray, dtype=double pointer
+        weighted_n_node_samples : ndarray, dtype=float64_t pointer
             The total weight of those samples
         """
 
@@ -81,8 +81,8 @@ cdef class PatchSplitter(BestObliqueSplitter):
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil:
         """ Sample the projection vector.
 
@@ -91,18 +91,18 @@ cdef class PatchSplitter(BestObliqueSplitter):
         """
         pass
 
-    cdef (SIZE_t, SIZE_t) sample_top_left_seed(self) noexcept nogil:
+    cdef (intp_t, intp_t) sample_top_left_seed(self) noexcept nogil:
         pass
 
 
 cdef class BaseDensePatchSplitter(PatchSplitter):
-    cdef int init(
+    cdef intp_t init(
         self,
         object X,
-        const DOUBLE_t[:, ::1] y,
-        const DOUBLE_t[:] sample_weight,
+        const float64_t[:, ::1] y,
+        const float64_t[:] sample_weight,
         const unsigned char[::1] missing_values_in_feature_mask,
-        # const INT32_t[:] n_categories
+        # const int32_t[:] n_categories
     ) except -1:
         """Initialize the splitter
 
@@ -120,18 +120,18 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
     def __cinit__(
         self,
         Criterion criterion,
-        SIZE_t max_features,
-        SIZE_t min_samples_leaf,
-        double min_weight_leaf,
+        intp_t max_features,
+        intp_t min_samples_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
-        const SIZE_t[:] min_patch_dims,
-        const SIZE_t[:] max_patch_dims,
+        float64_t feature_combinations,
+        const intp_t[:] min_patch_dims,
+        const intp_t[:] max_patch_dims,
         const cnp.uint8_t[:] dim_contiguous,
-        const SIZE_t[:] data_dims,
+        const intp_t[:] data_dims,
         bytes boundary,
-        const DTYPE_t[:, :] feature_weight,
+        const float32_t[:, :] feature_weight,
         *argv
     ):
         self.criterion = criterion
@@ -147,8 +147,8 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         self.monotonic_cst = monotonic_cst
 
         # Sparse max_features x n_features projection matrix
-        self.proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
-        self.proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
+        self.proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        self.proj_mat_indices = vector[vector[intp_t]](self.max_features)
 
         # initialize state to allow generalization to higher-dimensional tensors
         self.ndim = data_dims.shape[0]
@@ -196,31 +196,31 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
                 self.feature_weight.base if self.feature_weight is not None else None,
             ), self.__getstate__())
 
-    cdef (SIZE_t, SIZE_t) sample_top_left_seed(self) noexcept nogil:
+    cdef (intp_t, intp_t) sample_top_left_seed(self) noexcept nogil:
         """Sample the top-left seed for the n-dim patch.
 
         Returns
         -------
-        top_left_seed : SIZE_t
+        top_left_seed : intp_t
             The top-left seed vectorized (i.e. raveled) for the n-dim patch.
-        patch_size : SIZE_t
+        patch_size : intp_t
             The total size of the n-dim patch (i.e. the volume).
         """
         # now get the top-left seed that is used to then determine the top-left
         # position in patch
         # compute top-left seed for the multi-dimensional patch
-        cdef SIZE_t top_left_patch_seed
-        cdef SIZE_t patch_size = 1
+        cdef intp_t top_left_patch_seed
+        cdef intp_t patch_size = 1
 
         cdef UINT32_t* random_state = &self.rand_r_state
 
         # define parameters for the random patch
-        cdef SIZE_t patch_dim
-        cdef SIZE_t delta_patch_dim
+        cdef intp_t patch_dim
+        cdef intp_t delta_patch_dim
 
-        cdef SIZE_t dim
+        cdef intp_t dim
 
-        cdef SIZE_t idx
+        cdef intp_t idx
 
         for idx in range(self.ndim):
             # compute random patch width and height
@@ -272,23 +272,23 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
 
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil:
         """Sample projection matrix using a contiguous patch.
 
         Randomly sample patches with weight of 1.
         """
-        cdef SIZE_t max_features = self.max_features
-        cdef int proj_i
+        cdef intp_t max_features = self.max_features
+        cdef intp_t proj_i
 
         # define parameters for vectorized points in the original data shape
         # and top-left seed
-        cdef SIZE_t top_left_patch_seed
+        cdef intp_t top_left_patch_seed
 
         # size of the sampled patch, which is just the size of the n-dim patch
         # (\prod_i self.patch_dims_buff[i])
-        cdef SIZE_t patch_size
+        cdef intp_t patch_size
 
         for proj_i in range(0, max_features):
             # now get the top-left seed that is used to then determine the top-left
@@ -308,34 +308,34 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
 
     cdef void sample_proj_vec(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices,
-        SIZE_t proj_i,
-        SIZE_t patch_size,
-        SIZE_t top_left_patch_seed,
-        const SIZE_t[:] patch_dims,
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices,
+        intp_t proj_i,
+        intp_t patch_size,
+        intp_t top_left_patch_seed,
+        const intp_t[:] patch_dims,
     ) noexcept nogil:
         cdef UINT32_t* random_state = &self.rand_r_state
         # iterates over the size of the patch
-        cdef SIZE_t patch_idx
+        cdef intp_t patch_idx
 
         # stores how many patches we have iterated so far
-        cdef int vectorized_patch_offset
-        cdef SIZE_t vectorized_point_offset
-        cdef SIZE_t vectorized_point
+        cdef intp_t vectorized_patch_offset
+        cdef intp_t vectorized_point_offset
+        cdef intp_t vectorized_point
 
-        cdef SIZE_t dim_idx
+        cdef intp_t dim_idx
 
         # weights are default to 1
-        cdef DTYPE_t weight = 1.
+        cdef float32_t weight = 1.
 
         # XXX: still unsure if it works yet
         # XXX: THIS ONLY WORKS FOR THE FIRST DIMENSION THAT IS DISCONTIGUOUS.
-        cdef SIZE_t other_dims_offset
-        cdef SIZE_t row_index
+        cdef intp_t other_dims_offset
+        cdef intp_t row_index
 
-        cdef SIZE_t i
-        cdef int num_rows = self.data_dims[0]
+        cdef intp_t i
+        cdef intp_t num_rows = self.data_dims[0]
         if self._discontiguous:
             # fill with values 0, 1, ..., dimension - 1
             for i in range(0, self.data_dims[0]):
@@ -407,22 +407,22 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
 
     cdef void compute_features_over_samples(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        const SIZE_t[:] samples,
-        DTYPE_t[:] feature_values,
-        vector[DTYPE_t]* proj_vec_weights,  # weights of the vector (max_features,)
-        vector[SIZE_t]* proj_vec_indices    # indices of the features (max_features,)
+        intp_t start,
+        intp_t end,
+        const intp_t[:] samples,
+        float32_t[:] feature_values,
+        vector[float32_t]* proj_vec_weights,  # weights of the vector (max_features,)
+        vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
     ) noexcept nogil:
         """Compute the feature values for the samples[start:end] range.
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
         or 0 otherwise.
         """
-        cdef SIZE_t idx, jdx
+        cdef intp_t idx, jdx
 
         # initialize feature weight to normalize across patch
-        cdef DTYPE_t patch_weight
+        cdef float32_t patch_weight
 
         # Compute linear combination of features and then
         # sort samples according to the feature values.
@@ -454,14 +454,14 @@ cdef class BestPatchSplitterTester(BestPatchSplitter):
 
     cpdef sample_projection_vector(
         self,
-        SIZE_t proj_i,
-        SIZE_t patch_size,
-        SIZE_t top_left_patch_seed,
-        SIZE_t[:] patch_dims,
+        intp_t proj_i,
+        intp_t patch_size,
+        intp_t top_left_patch_seed,
+        intp_t[:] patch_dims,
     ):
-        cdef vector[vector[DTYPE_t]] proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
-        cdef vector[vector[SIZE_t]] proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
-        cdef SIZE_t i, j
+        cdef vector[vector[float32_t]] proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        cdef vector[vector[intp_t]] proj_mat_indices = vector[vector[intp_t]](self.max_features)
+        cdef intp_t i, j
 
         # sample projection matrix in C/C++
         self.sample_proj_vec(
@@ -489,9 +489,9 @@ cdef class BestPatchSplitterTester(BestPatchSplitter):
 
         Randomly sample patches with weight of 1.
         """
-        cdef vector[vector[DTYPE_t]] proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
-        cdef vector[vector[SIZE_t]] proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
-        cdef SIZE_t i, j
+        cdef vector[vector[float32_t]] proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        cdef vector[vector[intp_t]] proj_mat_indices = vector[vector[intp_t]](self.max_features)
+        cdef intp_t i, j
 
         # sample projection matrix in C/C++
         self.sample_proj_mat(proj_mat_weights, proj_mat_indices)
diff --git a/sktree/tree/unsupervised/_unsup_criterion.pxd b/sktree/tree/unsupervised/_unsup_criterion.pxd
index bfbd7428a..8ef633875 100644
--- a/sktree/tree/unsupervised/_unsup_criterion.pxd
+++ b/sktree/tree/unsupervised/_unsup_criterion.pxd
@@ -3,11 +3,7 @@
 # cython: language_level=3
 
 from ..._lib.sklearn.tree._criterion cimport BaseCriterion
-from ..._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from ..._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from ..._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from ..._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
+from ..._lib.sklearn.utils._typedefs cimport float32_t, float64_t, int32_t, intp_t
 
 # Note: This class is an exact copy of scikit-learn's Criterion
 # class, with the exception of the type of the internal structure.
@@ -31,31 +27,31 @@ cdef class UnsupervisedCriterion(BaseCriterion):
     # impurity of a split on that node. It also computes the output statistics.
 
     # Internal structures
-    cdef const DTYPE_t[:] feature_values  # 1D memview for the feature vector to compute criterion on
+    cdef const float32_t[:] feature_values  # 1D memview for the feature vector to compute criterion on
 
     # Keep running total of Xf[samples[start:end]] and the corresponding sum in
     # the left and right node. For example, this can then efficiently compute the
     # mean of the node, and left/right child by subtracting relevant Xf elements
     # and then dividing by the total number of samples in the node and left/right child.
-    cdef double sum_total     # The sum of the weighted count of each feature.
-    cdef double sum_left      # Same as above, but for the left side of the split
-    cdef double sum_right     # Same as above, but for the right side of the split
+    cdef float64_t sum_total     # The sum of the weighted count of each feature.
+    cdef float64_t sum_left      # Same as above, but for the left side of the split
+    cdef float64_t sum_right     # Same as above, but for the right side of the split
 
-    cdef double sumsq_total     # The sum of the weighted count of each feature.
-    cdef double sumsq_left      # Same as above, but for the left side of the split
-    cdef double sumsq_right     # Same as above, but for the right side of the split
+    cdef float64_t sumsq_total     # The sum of the weighted count of each feature.
+    cdef float64_t sumsq_left      # Same as above, but for the left side of the split
+    cdef float64_t sumsq_right     # Same as above, but for the right side of the split
 
     # Methods
     # -------
     # The 'init' method is copied here with the almost the exact same signature
     # as that of supervised learning criterion in scikit-learn to ensure that
     # Unsupervised criterion can be used with splitter and tree methods.
-    cdef int init(
+    cdef intp_t init(
         self,
-        const DTYPE_t[:] feature_values,
-        const DOUBLE_t[:] sample_weight,
-        double weighted_n_samples,
-        const SIZE_t[:] samples,
+        const float32_t[:] feature_values,
+        const float64_t[:] sample_weight,
+        float64_t weighted_n_samples,
+        const intp_t[:] samples,
     ) except -1 nogil
 
     cdef void init_feature_vec(
@@ -64,6 +60,6 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
     cdef void set_sample_pointers(
         self,
-        SIZE_t start,
-        SIZE_t end
+        intp_t start,
+        intp_t end
     ) noexcept nogil
diff --git a/sktree/tree/unsupervised/_unsup_criterion.pyx b/sktree/tree/unsupervised/_unsup_criterion.pyx
index 518fc2753..58f21f348 100644
--- a/sktree/tree/unsupervised/_unsup_criterion.pyx
+++ b/sktree/tree/unsupervised/_unsup_criterion.pyx
@@ -11,7 +11,7 @@ from libc.math cimport log
 
 cnp.import_array()
 
-cdef DTYPE_t PI = np.pi
+cdef float32_t PI = np.pi
 
 
 cdef class UnsupervisedCriterion(BaseCriterion):
@@ -64,19 +64,19 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
         Parameters
         ----------
-        Xf : array-like, dtype=DTYPE_t
+        Xf : array-like, dtype=float32_t
             The read-only memoryview 1D feature vector with (n_samples,) shape.
         """
         # also compute the sum total
         self.sum_total = 0.0
         self.sumsq_total = 0.0
         self.weighted_n_node_samples = 0.0
-        cdef SIZE_t s_idx
-        cdef SIZE_t p_idx
+        cdef intp_t s_idx
+        cdef intp_t p_idx
 
         # XXX: this can be further optimized by computing a cumulative sum hash map of the sum_total and sumsq_total
         # and then update will never have to iterate through even
-        cdef DOUBLE_t w = 1.0
+        cdef float64_t w = 1.0
 
         for p_idx in range(self.start, self.end):
             s_idx = self.sample_indices[p_idx]
@@ -93,12 +93,12 @@ cdef class UnsupervisedCriterion(BaseCriterion):
         # Reset to pos=start
         self.reset()
 
-    cdef int init(
+    cdef intp_t init(
         self,
-        const DTYPE_t[:] feature_values,
-        const DOUBLE_t[:] sample_weight,
-        double weighted_n_samples,
-        const SIZE_t[:] sample_indices,
+        const float32_t[:] feature_values,
+        const float64_t[:] sample_weight,
+        float64_t weighted_n_samples,
+        const intp_t[:] sample_indices,
     ) except -1 nogil:
         """Initialize the unsuperivsed criterion.
 
@@ -107,13 +107,13 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
         Parameters
         ----------
-        feature_values : array-like, dtype=DTYPE_t
+        feature_values : array-like, dtype=float32_t
             The memoryview 1D feature vector with (n_samples,) shape.
-        sample_weight : array-like, dtype=DOUBLE_t
+        sample_weight : array-like, dtype=float64_t
             The weight of each sample (i.e. row of X).
-        weighted_n_samples : double
+        weighted_n_samples : float64_t
             The total weight of all sample_indices.
-        sample_indices : array-like, dtype=SIZE_t
+        sample_indices : array-like, dtype=intp_t
             A mask on the sample_indices, showing which ones we want to use
         """
         self.feature_values = feature_values
@@ -123,7 +123,7 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
         return 0
 
-    cdef int reset(self) except -1 nogil:
+    cdef intp_t reset(self) except -1 nogil:
         """Reset the criterion at pos=start.
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
@@ -140,7 +140,7 @@ cdef class UnsupervisedCriterion(BaseCriterion):
         self.sumsq_right = self.sumsq_total
         return 0
 
-    cdef int reverse_reset(self) except -1 nogil:
+    cdef intp_t reverse_reset(self) except -1 nogil:
         """Reset the criterion at pos=end.
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
@@ -157,9 +157,9 @@ cdef class UnsupervisedCriterion(BaseCriterion):
         self.sumsq_left = self.sumsq_total
         return 0
 
-    cdef int update(
+    cdef intp_t update(
         self,
-        SIZE_t new_pos
+        intp_t new_pos
     ) except -1 nogil:
         """Updated statistics by moving sample_indices[pos:new_pos] to the left child.
 
@@ -168,19 +168,19 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
         Parameters
         ----------
-        new_pos : SIZE_t
+        new_pos : intp_t
             The new ending position for which to move sample_indices from the right
             child to the left child.
         """
-        cdef SIZE_t pos = self.pos
-        cdef SIZE_t end = self.end
+        cdef intp_t pos = self.pos
+        cdef intp_t end = self.end
 
-        cdef const SIZE_t[:] sample_indices = self.sample_indices
-        cdef const DOUBLE_t[:] sample_weight = self.sample_weight
+        cdef const intp_t[:] sample_indices = self.sample_indices
+        cdef const float64_t[:] sample_weight = self.sample_weight
 
-        cdef SIZE_t i
-        cdef SIZE_t p
-        cdef DOUBLE_t w = 1.0
+        cdef intp_t i
+        cdef intp_t p
+        cdef float64_t w = 1.0
 
         # Update statistics up to new_pos
         #
@@ -225,13 +225,13 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
     cdef void node_value(
         self,
-        double* dest
+        float64_t* dest
     ) noexcept nogil:
         """Set the node value with sum_total and save it into dest.
 
         Parameters
         ----------
-        dest : double pointer
+        dest : float64_t pointer
             The memory address which we will save the node value into.
         """
         # set values at the address pointer is pointing to with the total value
@@ -239,8 +239,8 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
     cdef void set_sample_pointers(
         self,
-        SIZE_t start,
-        SIZE_t end
+        intp_t start,
+        intp_t end
     ) noexcept nogil:
         """Set sample pointers in the criterion.
 
@@ -248,9 +248,9 @@ cdef class UnsupervisedCriterion(BaseCriterion):
 
         Parameters
         ----------
-        start : SIZE_t
+        start : intp_t
             The start sample pointer.
-        end : SIZE_t
+        end : intp_t
             The end sample pointer.
         """
         self.n_node_samples = end - start
@@ -307,7 +307,7 @@ cdef class TwoMeans(UnsupervisedCriterion):
     pair minimizes the splitting criteria described in the following
     section
     """
-    cdef double node_impurity(
+    cdef float64_t node_impurity(
         self
     ) noexcept nogil:
         """Evaluate the impurity of the current node.
@@ -316,7 +316,7 @@ cdef class TwoMeans(UnsupervisedCriterion):
         i.e. the variance of Xf[sample_indices[start:end]]. The smaller the impurity the
         better.
         """
-        cdef double impurity
+        cdef float64_t impurity
 
         # then compute the impurity as the variance
         impurity = self.fast_variance(self.weighted_n_node_samples, self.sumsq_total, self.sum_total)
@@ -324,8 +324,8 @@ cdef class TwoMeans(UnsupervisedCriterion):
 
     cdef void children_impurity(
         self,
-        double* impurity_left,
-        double* impurity_right
+        float64_t* impurity_left,
+        float64_t* impurity_right
     ) noexcept nogil:
         """Evaluate the impurity in children nodes.
 
@@ -340,9 +340,9 @@ cdef class TwoMeans(UnsupervisedCriterion):
 
         Parameters
         ----------
-        impurity_left : double pointer
+        impurity_left : float64_t pointer
             The memory address to save the impurity of the left node
-        impurity_right : double pointer
+        impurity_right : float64_t pointer
             The memory address to save the impurity of the right node
         """
         # set values at the address pointer is pointing to with the variance
@@ -350,11 +350,11 @@ cdef class TwoMeans(UnsupervisedCriterion):
         impurity_left[0] = self.fast_variance(self.weighted_n_left, self.sumsq_left, self.sum_left)
         impurity_right[0] = self.fast_variance(self.weighted_n_right, self.sumsq_right, self.sum_right)
 
-    cdef inline double fast_variance(
+    cdef inline float64_t fast_variance(
         self,
-        double weighted_n_node_samples,
-        double sumsq_total,
-        double sum_total
+        float64_t weighted_n_node_samples,
+        float64_t sumsq_total,
+        float64_t sum_total
     ) noexcept nogil:
         return (1. / weighted_n_node_samples) * \
             ((sumsq_total) - (1. / weighted_n_node_samples) * (sum_total * sum_total))
@@ -391,18 +391,18 @@ cdef class FastBIC(TwoMeans):
 
     Reference: https://arxiv.org/abs/1907.02844
     """
-    cdef inline double bic_cluster(
+    cdef inline float64_t bic_cluster(
         self,
-        SIZE_t n_samples,
-        double variance
+        intp_t n_samples,
+        float64_t variance
     ) noexcept nogil:
         """Help compute the BIC from assigning to a specific cluster.
 
         Parameters
         ----------
-        n_samples : SIZE_t
+        n_samples : intp_t
             The number of samples assigned cluster.
-        variance : double
+        variance : float64_t
             The plug-in variance for assigning to specific cluster.
 
         Notes
@@ -421,13 +421,13 @@ cdef class FastBIC(TwoMeans):
         """
         # chances of choosing the cluster based on how many samples are hard-assigned to cluster
         # i.e. the prior
-        # cast to double, so we do not round to integers
-        cdef double w_cluster = (n_samples + 0.0) / self.n_node_samples
+        # cast to float64_t, so we do not round to integers
+        cdef float64_t w_cluster = (n_samples + 0.0) / self.n_node_samples
 
         # add to prevent taking log of 0 when there is a degenerate cluster (i.e. single sample, or no variance)
         return -2. * (n_samples * log(w_cluster) + 0.5 * n_samples * log(2. * PI * variance + 1.e-7))
 
-    cdef double node_impurity(
+    cdef float64_t node_impurity(
         self
     ) noexcept nogil:
         """Evaluate the impurity of the current node.
@@ -437,8 +437,8 @@ cdef class FastBIC(TwoMeans):
         Namely, this is the maximum likelihood of Xf[sample_indices[start:end]].
         The smaller the impurity the better.
         """
-        cdef double variance
-        cdef double impurity
+        cdef float64_t variance
+        cdef float64_t impurity
 
         # then compute the variance of the cluster
         variance = self.fast_variance(self.weighted_n_node_samples, self.sumsq_total, self.sum_total)
@@ -451,8 +451,8 @@ cdef class FastBIC(TwoMeans):
 
     cdef void children_impurity(
         self,
-        double* impurity_left,
-        double* impurity_right
+        float64_t* impurity_left,
+        float64_t* impurity_right
     ) noexcept nogil:
         """Evaluate the impurity in children nodes.
 
@@ -461,20 +461,20 @@ cdef class FastBIC(TwoMeans):
 
         Parameters
         ----------
-        impurity_left : double pointer
+        impurity_left : float64_t pointer
             The memory address to save the impurity of the left node
-        impurity_right : double pointer
+        impurity_right : float64_t pointer
             The memory address to save the impurity of the right node
         """
-        cdef SIZE_t pos = self.pos
-        cdef SIZE_t start = self.start
-        cdef SIZE_t end = self.end
-        cdef SIZE_t n_samples_left, n_samples_right
-
-        cdef double variance_left, variance_right, variance_comb
-        cdef double BIC_diff_var_left, BIC_diff_var_right
-        cdef double BIC_same_var_left, BIC_same_var_right
-        cdef double BIC_same_var, BIC_diff_var
+        cdef intp_t pos = self.pos
+        cdef intp_t start = self.start
+        cdef intp_t end = self.end
+        cdef intp_t n_samples_left, n_samples_right
+
+        cdef float64_t variance_left, variance_right, variance_comb
+        cdef float64_t BIC_diff_var_left, BIC_diff_var_right
+        cdef float64_t BIC_same_var_left, BIC_same_var_right
+        cdef float64_t BIC_same_var, BIC_diff_var
 
         # number of samples of left and right
         n_samples_left = pos - start
diff --git a/sktree/tree/unsupervised/_unsup_oblique_splitter.pxd b/sktree/tree/unsupervised/_unsup_oblique_splitter.pxd
index 07a7049bb..5b7b4a2d4 100644
--- a/sktree/tree/unsupervised/_unsup_oblique_splitter.pxd
+++ b/sktree/tree/unsupervised/_unsup_oblique_splitter.pxd
@@ -4,27 +4,24 @@ cimport numpy as cnp
 from libcpp.vector cimport vector
 
 from ..._lib.sklearn.tree._splitter cimport SplitRecord
-from ..._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from ..._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from ..._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from ..._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
+from ..._lib.sklearn.tree._utils cimport UINT32_t
+from ..._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 from ._unsup_splitter cimport UnsupervisedSplitter
 
 
 cdef struct ObliqueSplitRecord:
     # Data to track sample split
-    SIZE_t feature              # Which feature to split on.
-    SIZE_t pos                  # Split samples array at the given position,
+    intp_t feature              # Which feature to split on.
+    intp_t pos                  # Split samples array at the given position,
     #                           # i.e. count of samples below threshold for feature.
     #                           # pos is >= end if the node is a leaf.
-    double threshold            # Threshold to split at.
-    double improvement          # Impurity improvement given parent node.
-    double impurity_left        # Impurity of the left split.
-    double impurity_right       # Impurity of the right split.
+    float64_t threshold            # Threshold to split at.
+    float64_t improvement          # Impurity improvement given parent node.
+    float64_t impurity_left        # Impurity of the left split.
+    float64_t impurity_right       # Impurity of the right split.
 
-    vector[DTYPE_t]* proj_vec_weights   # weights of the vector (max_features,)
-    vector[SIZE_t]* proj_vec_indices    # indices of the features (max_features,)
+    vector[float32_t]* proj_vec_weights   # weights of the vector (max_features,)
+    vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
 
 
 cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
@@ -38,44 +35,44 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
     """
 
     # Oblique Splitting extra parameters
-    cdef public double feature_combinations             # Number of features to combine
-    cdef SIZE_t n_non_zeros                             # Number of non-zero features
-    cdef vector[vector[DTYPE_t]] proj_mat_weights       # nonzero weights of sparse proj_mat matrix
-    cdef vector[vector[SIZE_t]] proj_mat_indices        # nonzero indices of sparse proj_mat matrix
-    cdef SIZE_t[::1] indices_to_sample                  # an array of indices to sample of size mtry X n_features
+    cdef public float64_t feature_combinations             # Number of features to combine
+    cdef intp_t n_non_zeros                             # Number of non-zero features
+    cdef vector[vector[float32_t]] proj_mat_weights       # nonzero weights of sparse proj_mat matrix
+    cdef vector[vector[intp_t]] proj_mat_indices        # nonzero indices of sparse proj_mat matrix
+    cdef intp_t[::1] indices_to_sample                  # an array of indices to sample of size mtry X n_features
 
     # All oblique splitters (i.e. non-axis aligned splitters) require a
     # function to sample a projection matrix that is applied to the feature matrix
     # to quickly obtain the sampled projections for candidate splits.
     cdef void sample_proj_mat(self,
-                              vector[vector[DTYPE_t]]& proj_mat_weights,
-                              vector[vector[SIZE_t]]& proj_mat_indices) noexcept nogil
+                              vector[vector[float32_t]]& proj_mat_weights,
+                              vector[vector[intp_t]]& proj_mat_indices) noexcept nogil
 
     # Redefined here since the new logic requires calling sample_proj_mat
-    cdef int node_reset(self, SIZE_t start, SIZE_t end,
-                        double* weighted_n_node_samples) except -1 nogil
+    cdef intp_t node_reset(self, intp_t start, intp_t end,
+                           float64_t* weighted_n_node_samples) except -1 nogil
 
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound
     ) except -1 nogil
-    cdef int init(
+    cdef intp_t init(
         self,
-        const DTYPE_t[:, :] X,
-        const DOUBLE_t[:] sample_weight
+        const float32_t[:, :] X,
+        const float64_t[:] sample_weight
     ) except -1
-    cdef int pointer_size(self) noexcept nogil
+    cdef intp_t pointer_size(self) noexcept nogil
 
     cdef void compute_features_over_samples(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        const SIZE_t[:] samples,
-        DTYPE_t[:] feature_values,
-        vector[DTYPE_t]* proj_vec_weights,  # weights of the vector (max_features,)
-        vector[SIZE_t]* proj_vec_indices    # indices of the features (max_features,)
+        intp_t start,
+        intp_t end,
+        const intp_t[:] samples,
+        float32_t[:] feature_values,
+        vector[float32_t]* proj_vec_weights,  # weights of the vector (max_features,)
+        vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
     ) noexcept nogil
diff --git a/sktree/tree/unsupervised/_unsup_oblique_splitter.pyx b/sktree/tree/unsupervised/_unsup_oblique_splitter.pyx
index e65612095..7cd51d3e6 100644
--- a/sktree/tree/unsupervised/_unsup_oblique_splitter.pyx
+++ b/sktree/tree/unsupervised/_unsup_oblique_splitter.pyx
@@ -18,17 +18,17 @@ from .._sklearn_splitter cimport sort
 from ._unsup_criterion cimport UnsupervisedCriterion
 
 
-cdef double INFINITY = np.inf
+cdef float64_t INFINITY = np.inf
 
 # Mitigate precision differences between 32 bit and 64 bit
-cdef DTYPE_t FEATURE_THRESHOLD = 1e-7
+cdef float32_t FEATURE_THRESHOLD = 1e-7
 
 # Constant to switch between algorithm non zero value extract algorithm
 # in SparseSplitter
-cdef DTYPE_t EXTRACT_NNZ_SWITCH = 0.1
+cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
 
-cdef inline void _init_split(ObliqueSplitRecord* self, SIZE_t start_pos) noexcept nogil:
+cdef inline void _init_split(ObliqueSplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY
     self.impurity_right = INFINITY
     self.pos = start_pos
@@ -47,11 +47,11 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
     def __cinit__(
         self,
         UnsupervisedCriterion criterion,
-        SIZE_t max_features,
-        SIZE_t min_samples_leaf,
-        double min_weight_leaf,
+        intp_t max_features,
+        intp_t min_samples_leaf,
+        float64_t min_weight_leaf,
         object random_state,
-        double feature_combinations,
+        float64_t feature_combinations,
         *argv
     ):
         """
@@ -60,20 +60,20 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
         criterion : Criterion
             The criterion to measure the quality of a split.
 
-        max_features : SIZE_t
+        max_features : intp_t
             The maximal number of randomly selected features which can be
             considered for a split.
 
-        min_samples_leaf : SIZE_t
+        min_samples_leaf : intp_t
             The minimal number of samples each leaf can have, where splits
             which would result in having less samples in a leaf are not
             considered.
 
-        min_weight_leaf : double
+        min_weight_leaf : float64_t
             The minimal weight each leaf can have, where the weight is the sum
             of the weights of each sample in it.
 
-        feature_combinations : double
+        feature_combinations : float64_t
             The average number of features to combine in an oblique split.
             Each feature is independently included with probability
             ``feature_combination`` / ``n_features``.
@@ -96,11 +96,11 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
         self.feature_combinations = feature_combinations
 
         # Sparse max_features x n_features projection matrix
-        self.proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
-        self.proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
+        self.proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        self.proj_mat_indices = vector[vector[intp_t]](self.max_features)
 
         # or max w/ 1...
-        self.n_non_zeros = max(int(self.max_features * self.feature_combinations), 1)
+        self.n_non_zeros = max(<intp_t>(self.max_features * self.feature_combinations), 1)
 
     def __getstate__(self):
         return {}
@@ -116,10 +116,10 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
                              self.random_state,
                              self.feature_combinations), self.__getstate__())
 
-    cdef int init(
+    cdef intp_t init(
         self,
-        const DTYPE_t[:, :] X,
-        const DOUBLE_t[:] sample_weight
+        const float32_t[:, :] X,
+        const float64_t[:] sample_weight
     ) except -1:
         UnsupervisedSplitter.init(self, X, sample_weight)
 
@@ -128,8 +128,8 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
                                            dtype=np.intp)
         return 0
 
-    cdef int node_reset(self, SIZE_t start, SIZE_t end,
-                        double* weighted_n_node_samples) except -1 nogil:
+    cdef intp_t node_reset(self, intp_t start, intp_t end,
+                           float64_t* weighted_n_node_samples) except -1 nogil:
         """Reset splitter on node samples[start:end].
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
@@ -137,11 +137,11 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
 
         Parameters
         ----------
-        start : SIZE_t
+        start : intp_t
             The index of the first sample to consider
-        end : SIZE_t
+        end : intp_t
             The index of the last sample to consider
-        weighted_n_node_samples : ndarray, dtype=double pointer
+        weighted_n_node_samples : ndarray, dtype=float64_t pointer
             The total weight of those samples
         """
         # call parent reset
@@ -154,35 +154,35 @@ cdef class UnsupervisedObliqueSplitter(UnsupervisedSplitter):
         return 0
 
     cdef void sample_proj_mat(self,
-                              vector[vector[DTYPE_t]]& proj_mat_weights,
-                              vector[vector[SIZE_t]]& proj_mat_indices) noexcept nogil:
+                              vector[vector[float32_t]]& proj_mat_weights,
+                              vector[vector[intp_t]]& proj_mat_indices) noexcept nogil:
         """ Sample the projection vector.
 
         This is a placeholder method.
         """
         pass
 
-    cdef int pointer_size(self) noexcept nogil:
+    cdef intp_t pointer_size(self) noexcept nogil:
         """Get size of a pointer to record for ObliqueSplitter."""
         return sizeof(ObliqueSplitRecord)
 
     cdef inline void compute_features_over_samples(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        const SIZE_t[:] samples,
-        DTYPE_t[:] feature_values,
-        vector[DTYPE_t]* proj_vec_weights,  # weights of the vector (max_features,)
-        vector[SIZE_t]* proj_vec_indices    # indices of the features (max_features,)
+        intp_t start,
+        intp_t end,
+        const intp_t[:] samples,
+        float32_t[:] feature_values,
+        vector[float32_t]* proj_vec_weights,  # weights of the vector (max_features,)
+        vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
     ) noexcept nogil:
         """Compute the feature values for the samples[start:end] range.
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
         or 0 otherwise.
         """
-        cdef SIZE_t idx, jdx
-        cdef SIZE_t col_idx
-        cdef DTYPE_t col_weight
+        cdef intp_t idx, jdx
+        cdef intp_t col_idx
+        cdef float32_t col_weight
 
         # Compute linear combination of features and then
         # sort samples according to the feature values.
@@ -201,24 +201,24 @@ cdef class BestObliqueUnsupervisedSplitter(UnsupervisedObliqueSplitter):
     # NOTE: vectors are passed by value, so & is needed to pass by reference
     cdef void sample_proj_mat(
         self,
-        vector[vector[DTYPE_t]]& proj_mat_weights,
-        vector[vector[SIZE_t]]& proj_mat_indices
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil:
         """
         Sparse Oblique Projection matrix.
         Randomly sample features to put in randomly sampled projection vectors
         weight = 1 or -1 with probability 0.5
         """
-        cdef SIZE_t n_features = self.n_features
-        cdef SIZE_t n_non_zeros = self.n_non_zeros
+        cdef intp_t n_features = self.n_features
+        cdef intp_t n_non_zeros = self.n_non_zeros
         cdef UINT32_t* random_state = &self.rand_r_state
 
-        cdef int i, feat_i, proj_i, rand_vec_index
-        cdef DTYPE_t weight
+        cdef intp_t i, feat_i, proj_i, rand_vec_index
+        cdef float32_t weight
 
         # construct an array to sample from mTry x n_features set of indices
-        cdef SIZE_t[::1] indices_to_sample = self.indices_to_sample
-        cdef SIZE_t grid_size = self.max_features * self.n_features
+        cdef intp_t[::1] indices_to_sample = self.indices_to_sample
+        cdef intp_t grid_size = self.max_features * self.n_features
 
         # shuffle indices over the 2D grid to sample using Fisher-Yates
         for i in range(0, grid_size):
@@ -242,13 +242,13 @@ cdef class BestObliqueUnsupervisedSplitter(UnsupervisedObliqueSplitter):
             proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
             proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,
+        float64_t impurity,
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil:
         """Find the best_split split on node samples[start:end]
 
@@ -259,25 +259,25 @@ cdef class BestObliqueUnsupervisedSplitter(UnsupervisedObliqueSplitter):
         cdef ObliqueSplitRecord* oblique_split = <ObliqueSplitRecord*>(split)
 
         # Draw random splits and pick the best_split
-        cdef SIZE_t[::1] samples = self.samples
-        cdef SIZE_t start = self.start
-        cdef SIZE_t end = self.end
+        cdef intp_t[::1] samples = self.samples
+        cdef intp_t start = self.start
+        cdef intp_t end = self.end
 
         # pointer array to store feature values to split on
-        cdef DTYPE_t[::1]  feature_values = self.feature_values
-        cdef SIZE_t max_features = self.max_features
-        cdef SIZE_t min_samples_leaf = self.min_samples_leaf
-        cdef double min_weight_leaf = self.min_weight_leaf
+        cdef float32_t[::1]  feature_values = self.feature_values
+        cdef intp_t max_features = self.max_features
+        cdef intp_t min_samples_leaf = self.min_samples_leaf
+        cdef float64_t min_weight_leaf = self.min_weight_leaf
 
         # keep track of split record for current_split node and the best_split split
         # found among the sampled projection vectors
         cdef ObliqueSplitRecord best_split, current_split
-        cdef double current_proxy_improvement = -INFINITY
-        cdef double best_proxy_improvement = -INFINITY
+        cdef float64_t current_proxy_improvement = -INFINITY
+        cdef float64_t best_proxy_improvement = -INFINITY
 
-        cdef SIZE_t feat_i, p       # index over computed features and start/end
-        cdef SIZE_t partition_end
-        cdef DTYPE_t temp_d         # to compute a projection feature value
+        cdef intp_t feat_i, p       # index over computed features and start/end
+        cdef intp_t partition_end
+        cdef float32_t temp_d         # to compute a projection feature value
 
         # instantiate the split records
         _init_split(&best_split, end)
diff --git a/sktree/tree/unsupervised/_unsup_oblique_tree.pxd b/sktree/tree/unsupervised/_unsup_oblique_tree.pxd
index 6aa891cf5..b1277b869 100644
--- a/sktree/tree/unsupervised/_unsup_oblique_tree.pxd
+++ b/sktree/tree/unsupervised/_unsup_oblique_tree.pxd
@@ -12,40 +12,36 @@ cimport numpy as cnp
 from libcpp.vector cimport vector
 
 from ..._lib.sklearn.tree._splitter cimport SplitRecord
-from ..._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from ..._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from ..._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from ..._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
 from ..._lib.sklearn.tree._tree cimport Node
+from ..._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 from .._oblique_splitter cimport ObliqueSplitRecord
 from ._unsup_tree cimport UnsupervisedTree
 
 
 cdef class UnsupervisedObliqueTree(UnsupervisedTree):
-    cdef vector[vector[DTYPE_t]] proj_vec_weights  # (capacity, n_features) array of projection vectors
-    cdef vector[vector[SIZE_t]] proj_vec_indices   # (capacity, n_features) array of projection vectors
+    cdef vector[vector[float32_t]] proj_vec_weights  # (capacity, n_features) array of projection vectors
+    cdef vector[vector[intp_t]] proj_vec_indices   # (capacity, n_features) array of projection vectors
 
     # overridden methods
-    cdef int _resize_c(
+    cdef intp_t _resize_c(
         self,
-        SIZE_t capacity=*
+        intp_t capacity=*
     ) except -1 nogil
-    cdef int _set_split_node(
+    cdef intp_t _set_split_node(
         self,
         SplitRecord* split_node,
         Node *node,
-        SIZE_t node_id,
+        intp_t node_id,
     )  nogil except -1
-    cdef DTYPE_t _compute_feature(
+    cdef float32_t _compute_feature(
         self,
-        const DTYPE_t[:, :] X_ndarray,
-        SIZE_t sample_index,
+        const float32_t[:, :] X_ndarray,
+        intp_t sample_index,
         Node *node
     ) noexcept nogil
     cdef void _compute_feature_importances(
         self,
-        cnp.float64_t[:] importances,
+        float64_t[:] importances,
         Node* node
     ) noexcept nogil
 
diff --git a/sktree/tree/unsupervised/_unsup_oblique_tree.pyx b/sktree/tree/unsupervised/_unsup_oblique_tree.pyx
index 0ea70a6b6..51f6e6202 100644
--- a/sktree/tree/unsupervised/_unsup_oblique_tree.pyx
+++ b/sktree/tree/unsupervised/_unsup_oblique_tree.pyx
@@ -27,7 +27,7 @@ cdef Node dummy
 NODE_DTYPE = np.asarray(<Node[:1]>(&dummy)).dtype
 
 # Mitigate precision differences between 32 bit and 64 bit
-cdef DTYPE_t FEATURE_THRESHOLD = 1e-7
+cdef float32_t FEATURE_THRESHOLD = 1e-7
 
 # =============================================================================
 # ObliqueTree
@@ -45,49 +45,49 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
 
     Attributes
     ----------
-    node_count : int
+    node_count : intp_t
         The number of nodes (internal nodes + leaves) in the tree.
 
-    capacity : int
+    capacity : intp_t
         The current capacity (i.e., size) of the arrays, which is at least as
         great as `node_count`.
 
-    max_depth : int
+    max_depth : intp_t
         The depth of the tree, i.e. the maximum depth of its leaves.
 
-    children_left : array of int, shape [node_count]
+    children_left : array of intp_t, shape [node_count]
         children_left[i] holds the node id of the left child of node i.
         For leaves, children_left[i] == TREE_LEAF. Otherwise,
         children_left[i] > i. This child handles the case where
         X[:, feature[i]] <= threshold[i].
 
-    children_right : array of int, shape [node_count]
+    children_right : array of intp_t, shape [node_count]
         children_right[i] holds the node id of the right child of node i.
         For leaves, children_right[i] == TREE_LEAF. Otherwise,
         children_right[i] > i. This child handles the case where
         X[:, feature[i]] > threshold[i].
 
-    feature : array of int, shape [node_count]
+    feature : array of intp_t, shape [node_count]
         feature[i] holds the feature to split on, for the internal node i.
 
-    threshold : array of double, shape [node_count]
+    threshold : array of float64_t, shape [node_count]
         threshold[i] holds the threshold for the internal node i.
 
-    value : array of double, shape [node_count, n_outputs, max_n_classes]
+    value : array of float64_t, shape [node_count, n_outputs, max_n_classes]
         Contains the constant prediction value of each node.
 
-    impurity : array of double, shape [node_count]
+    impurity : array of float64_t, shape [node_count]
         impurity[i] holds the impurity (i.e., the value of the splitting
         criterion) at node i.
 
-    n_node_samples : array of int, shape [node_count]
+    n_node_samples : array of intp_t, shape [node_count]
         n_node_samples[i] holds the number of training samples reaching node i.
 
-    weighted_n_node_samples : array of int, shape [node_count]
+    weighted_n_node_samples : array of intp_t, shape [node_count]
         weighted_n_node_samples[i] holds the weighted number of training samples
         reaching node i.
     """
-    def __cinit__(self, int n_features):
+    def __cinit__(self, intp_t n_features):
         """Constructor."""
         # Input/Output layout
         self.n_features = n_features
@@ -100,8 +100,8 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
         self.value = NULL
         self.nodes = NULL
 
-        self.proj_vec_weights = vector[vector[DTYPE_t]](self.capacity)
-        self.proj_vec_indices = vector[vector[SIZE_t]](self.capacity)
+        self.proj_vec_weights = vector[vector[float32_t]](self.capacity)
+        self.proj_vec_indices = vector[vector[intp_t]](self.capacity)
 
     def __reduce__(self):
         """Reduce re-implementation, for pickling."""
@@ -159,7 +159,7 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
         memcpy(self.nodes, cnp.PyArray_DATA(node_ndarray),
                self.capacity * sizeof(Node))
         memcpy(self.value, cnp.PyArray_DATA(value_ndarray),
-               self.capacity * self.value_stride * sizeof(double))
+               self.capacity * self.value_stride * sizeof(float64_t))
 
     cpdef cnp.ndarray get_projection_matrix(self):
         """Get the projection matrix of shape (node_count, n_features)."""
@@ -171,7 +171,7 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
                 proj_vecs[i, feat] = weight
         return proj_vecs
 
-    cdef int _resize_c(self, SIZE_t capacity=SIZE_MAX) except -1 nogil:
+    cdef intp_t _resize_c(self, intp_t capacity=SIZE_MAX) except -1 nogil:
         """Guts of _resize.
 
         Additionally resizes the projection indices and weights.
@@ -200,7 +200,7 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
         # value memory is initialised to 0 to enable classifier argmax
         if capacity > self.capacity:
             memset(<void*>(self.value + self.capacity * self.value_stride), 0,
-                   (capacity - self.capacity) * self.value_stride * sizeof(double))
+                   (capacity - self.capacity) * self.value_stride * sizeof(float64_t))
 
         # if capacity smaller than node_count, adjust the counter
         if capacity < self.node_count:
@@ -209,7 +209,7 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
         self.capacity = capacity
         return 0
 
-    cdef int _set_split_node(self, SplitRecord* split_node, Node *node, SIZE_t node_id) except -1 nogil:
+    cdef intp_t _set_split_node(self, SplitRecord* split_node, Node *node, intp_t node_id) except -1 nogil:
         """Set node data.
         """
         # Cython type cast split record into its inherited split record
@@ -230,10 +230,10 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
         )
         return 1
 
-    cdef DTYPE_t _compute_feature(
+    cdef float32_t _compute_feature(
         self,
-        const DTYPE_t[:, :] X_ndarray,
-        SIZE_t sample_index,
+        const float32_t[:, :] X_ndarray,
+        intp_t sample_index,
         Node *node
     ) noexcept nogil:
         """Compute feature from a given data matrix, X.
@@ -241,15 +241,15 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
         In oblique-aligned trees, this is the projection of X.
         In this case, we take a simple linear combination of some columns of X.
         """
-        cdef DTYPE_t proj_feat = 0.0
-        cdef DTYPE_t weight = 0.0
-        cdef SIZE_t j = 0
-        cdef SIZE_t feature_index
+        cdef float32_t proj_feat = 0.0
+        cdef float32_t weight = 0.0
+        cdef intp_t j = 0
+        cdef intp_t feature_index
 
         # get the index of the node
-        cdef SIZE_t node_id = node - self.nodes
+        cdef intp_t node_id = node - self.nodes
 
-        # cdef SIZE_t n_projections = proj_vec_indices.size()
+        # cdef intp_t n_projections = proj_vec_indices.size()
         # compute projection of the data based on trained tree
         # proj_vec_weights = self.proj_vec_weights[node_id]
         # proj_vec_indices = self.proj_vec_indices[node_id]
@@ -266,7 +266,7 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
 
     cdef void _compute_feature_importances(
         self,
-        cnp.float64_t[:] importances,
+        float64_t[:] importances,
         Node* node
     ) noexcept nogil:
         """Compute feature importances from a Node in the Tree.
@@ -279,13 +279,13 @@ cdef class UnsupervisedObliqueTree(UnsupervisedTree):
         cdef Node* right
 
         # get the index of the node
-        cdef SIZE_t node_id = node - self.nodes
+        cdef intp_t node_id = node - self.nodes
 
         left = &nodes[node.left_child]
         right = &nodes[node.right_child]
 
-        cdef int i, feature_index
-        cdef DTYPE_t weight
+        cdef intp_t i, feature_index
+        cdef float32_t weight
         for i in range(0, self.proj_vec_indices[node_id].size()):
             feature_index = self.proj_vec_indices[node_id][i]
             weight = self.proj_vec_weights[node_id][i]
diff --git a/sktree/tree/unsupervised/_unsup_splitter.pxd b/sktree/tree/unsupervised/_unsup_splitter.pxd
index 324d87445..702a69459 100644
--- a/sktree/tree/unsupervised/_unsup_splitter.pxd
+++ b/sktree/tree/unsupervised/_unsup_splitter.pxd
@@ -1,9 +1,6 @@
 from ..._lib.sklearn.tree._splitter cimport BaseSplitter, SplitRecord
-from ..._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from ..._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from ..._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from ..._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
+from ..._lib.sklearn.tree._utils cimport UINT32_t
+from ..._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 from ._unsup_criterion cimport UnsupervisedCriterion
 
 
@@ -23,35 +20,35 @@ cdef class UnsupervisedSplitter(BaseSplitter):
 
     # XXX: requires BaseSplitter to not define "criterion"
     cdef public UnsupervisedCriterion criterion         # criterion computer
-    cdef const DTYPE_t[:, :] X                          # feature matrix
-    cdef SIZE_t n_total_samples                         # store the total number of samples
+    cdef const float32_t[:, :] X                          # feature matrix
+    cdef intp_t n_total_samples                         # store the total number of samples
 
     # Initialization method for unsupervised splitters
-    cdef int init(
+    cdef intp_t init(
         self,
-        const DTYPE_t[:, :] X,
-        const DOUBLE_t[:] sample_weight
+        const float32_t[:, :] X,
+        const float64_t[:] sample_weight
     ) except -1
 
     # Overridden Methods from base class
-    cdef int node_reset(
+    cdef intp_t node_reset(
         self,
-        SIZE_t start,
-        SIZE_t end,
-        double* weighted_n_node_samples
+        intp_t start,
+        intp_t end,
+        float64_t* weighted_n_node_samples
     ) except -1 nogil
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound
     ) except -1 nogil
     cdef void node_value(
         self,
-        double* dest
+        float64_t* dest
     ) noexcept nogil
-    cdef double node_impurity(
+    cdef float64_t node_impurity(
         self
     ) noexcept nogil
diff --git a/sktree/tree/unsupervised/_unsup_splitter.pyx b/sktree/tree/unsupervised/_unsup_splitter.pyx
index be97ea246..df48394a0 100644
--- a/sktree/tree/unsupervised/_unsup_splitter.pyx
+++ b/sktree/tree/unsupervised/_unsup_splitter.pyx
@@ -16,16 +16,16 @@ from sklearn.tree._utils cimport RAND_R_MAX, rand_int
 from .._sklearn_splitter cimport sort
 
 
-cdef double INFINITY = np.inf
+cdef float64_t INFINITY = np.inf
 
 # Mitigate precision differences between 32 bit and 64 bit
-cdef DTYPE_t FEATURE_THRESHOLD = 1e-7
+cdef float32_t FEATURE_THRESHOLD = 1e-7
 
 # Constant to switch between algorithm non zero value extract algorithm
 # in SparseSplitter
-cdef DTYPE_t EXTRACT_NNZ_SWITCH = 0.1
+cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
-cdef inline void _init_split(SplitRecord* self, SIZE_t start_pos) noexcept nogil:
+cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY
     self.impurity_right = INFINITY
     self.pos = start_pos
@@ -37,8 +37,8 @@ cdef inline void _init_split(SplitRecord* self, SIZE_t start_pos) noexcept nogil
 cdef class UnsupervisedSplitter(BaseSplitter):
     """Base class for unsupervised splitters."""
 
-    def __cinit__(self, UnsupervisedCriterion criterion, SIZE_t max_features,
-                  SIZE_t min_samples_leaf, double min_weight_leaf,
+    def __cinit__(self, UnsupervisedCriterion criterion, intp_t max_features,
+                  intp_t min_samples_leaf, float64_t min_weight_leaf,
                   object random_state, *argv):
         """
         Parameters
@@ -46,16 +46,16 @@ cdef class UnsupervisedSplitter(BaseSplitter):
         criterion : Criterion
             The criterion to measure the quality of a split.
 
-        max_features : SIZE_t
+        max_features : intp_t
             The maximal number of randomly selected features which can be
             considered for a split.
 
-        min_samples_leaf : SIZE_t
+        min_samples_leaf : intp_t
             The minimal number of samples each leaf can have, where splits
             which would result in having less samples in a leaf are not
             considered.
 
-        min_weight_leaf : double
+        min_weight_leaf : float64_t
             The minimal weight each leaf can have, where the weight is the sum
             of the weights of each sample in it.
 
@@ -79,21 +79,21 @@ cdef class UnsupervisedSplitter(BaseSplitter):
                              self.min_weight_leaf,
                              self.random_state), self.__getstate__())
 
-    cdef int init(
+    cdef intp_t init(
         self,
-        const DTYPE_t[:, :] X,
-        const DOUBLE_t[:] sample_weight
+        const float32_t[:, :] X,
+        const float64_t[:] sample_weight
     ) except -1:
         self.rand_r_state = self.random_state.randint(0, RAND_R_MAX)
-        cdef SIZE_t n_samples = X.shape[0]
+        cdef intp_t n_samples = X.shape[0]
 
         # Create a new array which will be used to store nonzero
         # samples from the feature of interest
         self.samples = np.empty(n_samples, dtype=np.intp)
-        cdef SIZE_t[::1] samples = self.samples
+        cdef intp_t[::1] samples = self.samples
 
-        cdef SIZE_t i, j
-        cdef double weighted_n_samples = 0.0
+        cdef intp_t i, j
+        cdef float64_t weighted_n_samples = 0.0
         j = 0
 
         for i in range(n_samples):
@@ -111,7 +111,7 @@ cdef class UnsupervisedSplitter(BaseSplitter):
         self.n_samples = j
         self.weighted_n_samples = weighted_n_samples
 
-        cdef SIZE_t n_features = X.shape[1]
+        cdef intp_t n_features = X.shape[1]
         self.features = np.arange(n_features, dtype=np.intp)
         self.n_features = n_features
 
@@ -136,8 +136,8 @@ cdef class UnsupervisedSplitter(BaseSplitter):
         )
         return 0
 
-    cdef int node_reset(self, SIZE_t start, SIZE_t end,
-                        double* weighted_n_node_samples) except -1 nogil:
+    cdef intp_t node_reset(self, intp_t start, intp_t end,
+                           float64_t* weighted_n_node_samples) except -1 nogil:
         """Reset splitter on node samples[start:end].
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
@@ -145,11 +145,11 @@ cdef class UnsupervisedSplitter(BaseSplitter):
 
         Parameters
         ----------
-        start : SIZE_t
+        start : intp_t
             The index of the first sample to consider
-        end : SIZE_t
+        end : intp_t
             The index of the last sample to consider
-        weighted_n_node_samples : ndarray, dtype=double pointer
+        weighted_n_node_samples : ndarray, dtype=float64_t pointer
             The total weight of those samples
         """
 
@@ -161,12 +161,12 @@ cdef class UnsupervisedSplitter(BaseSplitter):
         weighted_n_node_samples[0] = self.criterion.weighted_n_node_samples
         return 0
 
-    cdef void node_value(self, double* dest) noexcept nogil:
+    cdef void node_value(self, float64_t* dest) noexcept nogil:
         """Copy the value of node samples[start:end] into dest."""
 
         self.criterion.node_value(dest)
 
-    cdef double node_impurity(self) noexcept nogil:
+    cdef float64_t node_impurity(self) noexcept nogil:
         """Return the impurity of the current_split node."""
 
         return self.criterion.node_impurity()
@@ -174,13 +174,13 @@ cdef class UnsupervisedSplitter(BaseSplitter):
 cdef class BestUnsupervisedSplitter(UnsupervisedSplitter):
     """Split in a best_split-first fashion.
     """
-    cdef int node_split(
+    cdef intp_t node_split(
         self,
-        double impurity,
+        float64_t impurity,
         SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound
+        intp_t* n_constant_features,
+        float64_t lower_bound,
+        float64_t upper_bound
     ) except -1 nogil:
         """Find the best_split split on node samples[start:end].
 
@@ -195,40 +195,40 @@ cdef class BestUnsupervisedSplitter(UnsupervisedSplitter):
         be cimportable.
         """
         # Find the best_split split
-        cdef SIZE_t[::1] samples = self.samples
-        cdef SIZE_t start = self.start
-        cdef SIZE_t end = self.end
+        cdef intp_t[::1] samples = self.samples
+        cdef intp_t start = self.start
+        cdef intp_t end = self.end
 
-        cdef SIZE_t[::1] features = self.features
-        cdef SIZE_t[::1] constant_features = self.constant_features
-        cdef SIZE_t n_features = self.n_features
+        cdef intp_t[::1] features = self.features
+        cdef intp_t[::1] constant_features = self.constant_features
+        cdef intp_t n_features = self.n_features
 
-        cdef DTYPE_t[::1] feature_values = self.feature_values
-        cdef SIZE_t max_features = self.max_features
-        cdef SIZE_t min_samples_leaf = self.min_samples_leaf
+        cdef float32_t[::1] feature_values = self.feature_values
+        cdef intp_t max_features = self.max_features
+        cdef intp_t min_samples_leaf = self.min_samples_leaf
         cdef UINT32_t* random_state = &self.rand_r_state
 
         # XXX: maybe need to rename to something else
-        cdef double min_weight_leaf = self.min_weight_leaf
+        cdef float64_t min_weight_leaf = self.min_weight_leaf
 
         cdef SplitRecord best_split, current_split
-        cdef double current_proxy_improvement = -INFINITY
-        cdef double best_proxy_improvement = -INFINITY
+        cdef float64_t current_proxy_improvement = -INFINITY
+        cdef float64_t best_proxy_improvement = -INFINITY
 
-        cdef SIZE_t f_i = n_features
-        cdef SIZE_t f_j
-        cdef SIZE_t p
-        cdef SIZE_t i
+        cdef intp_t f_i = n_features
+        cdef intp_t f_j
+        cdef intp_t p
+        cdef intp_t i
 
-        cdef SIZE_t n_visited_features = 0
+        cdef intp_t n_visited_features = 0
         # Number of features discovered to be constant during the split search
-        cdef SIZE_t n_found_constants = 0
+        cdef intp_t n_found_constants = 0
         # Number of features known to be constant and drawn without replacement
-        cdef SIZE_t n_drawn_constants = 0
-        cdef SIZE_t n_known_constants = n_constant_features[0]
+        cdef intp_t n_drawn_constants = 0
+        cdef intp_t n_known_constants = n_constant_features[0]
         # n_total_constants = n_known_constants + n_found_constants
-        cdef SIZE_t n_total_constants = n_known_constants
-        cdef SIZE_t partition_end
+        cdef intp_t n_total_constants = n_known_constants
+        cdef intp_t partition_end
 
         _init_split(&best_split, end)
 
@@ -372,12 +372,12 @@ cdef class BestUnsupervisedSplitter(UnsupervisedSplitter):
         # Respect invariant for constant features: the original order of
         # element in features[:n_known_constants] must be preserved for sibling
         # and child nodes
-        memcpy(&features[0], &constant_features[0], sizeof(SIZE_t) * n_known_constants)
+        memcpy(&features[0], &constant_features[0], sizeof(intp_t) * n_known_constants)
 
         # Copy newly found constant features
         memcpy(&constant_features[n_known_constants],
                &features[n_known_constants],
-               sizeof(SIZE_t) * n_found_constants)
+               sizeof(intp_t) * n_found_constants)
 
         # Return values
         split[0] = best_split
diff --git a/sktree/tree/unsupervised/_unsup_tree.pxd b/sktree/tree/unsupervised/_unsup_tree.pxd
index 329766812..fe4a4630c 100644
--- a/sktree/tree/unsupervised/_unsup_tree.pxd
+++ b/sktree/tree/unsupervised/_unsup_tree.pxd
@@ -11,12 +11,8 @@ import numpy as np
 cimport numpy as cnp
 
 from ..._lib.sklearn.tree._splitter cimport SplitRecord
-from ..._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
-from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
-from ..._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
-from ..._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
-from ..._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
 from ..._lib.sklearn.tree._tree cimport BaseTree, Node
+from ..._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
 from ._unsup_splitter cimport UnsupervisedSplitter
 
 
@@ -28,27 +24,27 @@ cdef class UnsupervisedTree(BaseTree):
     #
     # Inner structures: values are stored separately from node structure,
     # since size is determined at runtime.
-    # cdef double* value                   # (capacity) array of values
-    # cdef SIZE_t value_stride             # = 1
+    # cdef float64_t* value                   # (capacity) array of values
+    # cdef intp_t value_stride             # = 1
 
     # Input/Output layout
-    cdef public SIZE_t n_features        # Number of features in X
+    cdef public intp_t n_features        # Number of features in X
 
     # Methods
     cdef cnp.ndarray _get_value_ndarray(self)
     cdef cnp.ndarray _get_node_ndarray(self)
 
     # Overridden Methods
-    cdef int _set_split_node(
+    cdef intp_t _set_split_node(
         self,
         SplitRecord* split_node,
         Node* node,
-        SIZE_t node_id
+        intp_t node_id
     ) except -1 nogil
-    cdef DTYPE_t _compute_feature(
+    cdef float32_t _compute_feature(
         self,
-        const DTYPE_t[:, :] X_ndarray,
-        SIZE_t sample_index,
+        const float32_t[:, :] X_ndarray,
+        intp_t sample_index,
         Node *node
     ) noexcept nogil
     cdef void _compute_feature_importances(
@@ -71,20 +67,20 @@ cdef class UnsupervisedTreeBuilder:
 
     cdef UnsupervisedSplitter splitter  # Splitting algorithm
 
-    cdef SIZE_t min_samples_split       # Minimum number of samples in an internal node
-    cdef SIZE_t min_samples_leaf        # Minimum number of samples in a leaf
-    cdef double min_weight_leaf         # Minimum weight in a leaf
-    cdef SIZE_t max_depth               # Maximal tree depth
-    cdef double min_impurity_decrease   # Impurity threshold for early stopping
+    cdef intp_t min_samples_split       # Minimum number of samples in an internal node
+    cdef intp_t min_samples_leaf        # Minimum number of samples in a leaf
+    cdef float64_t min_weight_leaf         # Minimum weight in a leaf
+    cdef intp_t max_depth               # Maximal tree depth
+    cdef float64_t min_impurity_decrease   # Impurity threshold for early stopping
 
     cpdef build(
         self,
         UnsupervisedTree tree,
         object X,
-        const DOUBLE_t[:] sample_weight=*
+        const float64_t[:] sample_weight=*
     )
     cdef _check_input(
         self,
         object X,
-        const DOUBLE_t[:] sample_weight
+        const float64_t[:] sample_weight
     )
diff --git a/sktree/tree/unsupervised/_unsup_tree.pyx b/sktree/tree/unsupervised/_unsup_tree.pyx
index df4217ce7..491a2f920 100644
--- a/sktree/tree/unsupervised/_unsup_tree.pyx
+++ b/sktree/tree/unsupervised/_unsup_tree.pyx
@@ -29,10 +29,10 @@ cnp.import_array()
 
 cdef extern from "numpy/arrayobject.h":
     object PyArray_NewFromDescr(PyTypeObject* subtype, cnp.dtype descr,
-                                int nd, cnp.npy_intp* dims,
+                                intp_t nd, cnp.npy_intp* dims,
                                 cnp.npy_intp* strides,
-                                void* data, int flags, object obj)
-    int PyArray_SetBaseObject(cnp.ndarray arr, PyObject* obj)
+                                void* data, intp_t flags, object obj)
+    intp_t PyArray_SetBaseObject(cnp.ndarray arr, PyObject* obj)
 
 cdef extern from "<stack>" namespace "std" nogil:
     cdef cppclass stack[T]:
@@ -51,19 +51,19 @@ from numpy import float32 as DTYPE
 from numpy import float64 as DOUBLE
 
 
-cdef double INFINITY = np.inf
-cdef double EPSILON = np.finfo('double').eps
+cdef float64_t INFINITY = np.inf
+cdef float64_t EPSILON = np.finfo('double').eps
 
 # Some handy constants (BestFirstTreeBuilder)
-cdef int IS_FIRST = 1
-cdef int IS_NOT_FIRST = 0
-cdef int IS_LEFT = 1
-cdef int IS_NOT_LEFT = 0
+cdef intp_t IS_FIRST = 1
+cdef intp_t IS_NOT_FIRST = 0
+cdef intp_t IS_LEFT = 1
+cdef intp_t IS_NOT_LEFT = 0
 
 TREE_LEAF = -1
 TREE_UNDEFINED = -2
-cdef SIZE_t _TREE_LEAF = TREE_LEAF
-cdef SIZE_t _TREE_UNDEFINED = TREE_UNDEFINED
+cdef intp_t _TREE_LEAF = TREE_LEAF
+cdef intp_t _TREE_UNDEFINED = TREE_UNDEFINED
 
 # Build the corresponding numpy dtype for Node.
 # This works by casting `dummy` to an array of Node of length 1, which numpy
@@ -83,7 +83,7 @@ cdef class UnsupervisedTreeBuilder:
         self,
         UnsupervisedTree tree,
         object X,
-        const DOUBLE_t[:] sample_weight=None
+        const float64_t[:] sample_weight=None
     ):
         """Build a decision tree from the training set X."""
         pass
@@ -91,7 +91,7 @@ cdef class UnsupervisedTreeBuilder:
     cdef inline _check_input(
         self,
         object X,
-        const DOUBLE_t[:] sample_weight,
+        const float64_t[:] sample_weight,
     ):
         """Check input dtype, layout and format"""
         if issparse(X):
@@ -121,27 +121,27 @@ cdef struct FrontierRecord:
     # Record of information of a Node, the frontier for a split. Those records are
     # maintained in a heap to access the Node with the best improvement in impurity,
     # allowing growing trees greedily on this improvement.
-    SIZE_t node_id
-    SIZE_t start
-    SIZE_t end
-    SIZE_t pos
-    SIZE_t depth
+    intp_t node_id
+    intp_t start
+    intp_t end
+    intp_t pos
+    intp_t depth
     bint is_leaf
-    double impurity
-    double impurity_left
-    double impurity_right
-    double improvement
+    float64_t impurity
+    float64_t impurity_left
+    float64_t impurity_right
+    float64_t improvement
 
 # Depth first builder ---------------------------------------------------------
 # A record on the stack for depth-first tree growing
 cdef struct StackRecord:
-    SIZE_t start
-    SIZE_t end
-    SIZE_t depth
-    SIZE_t parent
+    intp_t start
+    intp_t end
+    intp_t depth
+    intp_t parent
     bint is_left
-    double impurity
-    SIZE_t n_constant_features
+    float64_t impurity
+    intp_t n_constant_features
 
 cdef inline bool _compare_records(
     const FrontierRecord& left,
@@ -164,17 +164,17 @@ cdef class UnsupervisedBestFirstTreeBuilder(UnsupervisedTreeBuilder):
     The best node to expand is given by the node at the frontier that has the
     highest impurity improvement.
     """
-    cdef SIZE_t max_leaf_nodes
+    cdef intp_t max_leaf_nodes
 
     def __cinit__(
         self,
         UnsupervisedSplitter splitter,
-        SIZE_t min_samples_split,
-        SIZE_t min_samples_leaf,
-        double min_weight_leaf,
-        SIZE_t max_depth,
-        SIZE_t max_leaf_nodes,
-        double min_impurity_decrease
+        intp_t min_samples_split,
+        intp_t min_samples_leaf,
+        float64_t min_weight_leaf,
+        intp_t max_depth,
+        intp_t max_leaf_nodes,
+        float64_t min_impurity_decrease
     ):
         self.splitter = splitter
         self.min_samples_split = min_samples_split
@@ -188,7 +188,7 @@ cdef class UnsupervisedBestFirstTreeBuilder(UnsupervisedTreeBuilder):
         self,
         UnsupervisedTree tree,
         object X,
-        const DOUBLE_t[:] sample_weight=None
+        const float64_t[:] sample_weight=None
     ):
         """Build a decision tree from the training set X."""
         # check input
@@ -196,7 +196,7 @@ cdef class UnsupervisedBestFirstTreeBuilder(UnsupervisedTreeBuilder):
 
         # Parameters
         cdef UnsupervisedSplitter splitter = self.splitter
-        cdef SIZE_t max_leaf_nodes = self.max_leaf_nodes
+        cdef intp_t max_leaf_nodes = self.max_leaf_nodes
 
         # Recursive partition (without actual recursion)
         splitter.init(X, sample_weight)
@@ -206,15 +206,15 @@ cdef class UnsupervisedBestFirstTreeBuilder(UnsupervisedTreeBuilder):
         cdef FrontierRecord split_node_left
         cdef FrontierRecord split_node_right
 
-        cdef SIZE_t n_node_samples = splitter.n_samples
-        cdef SIZE_t max_split_nodes = max_leaf_nodes - 1
+        cdef intp_t n_node_samples = splitter.n_samples
+        cdef intp_t max_split_nodes = max_leaf_nodes - 1
         cdef bint is_leaf
-        cdef SIZE_t max_depth_seen = -1
-        cdef int rc = 0
+        cdef intp_t max_depth_seen = -1
+        cdef intp_t rc = 0
         cdef Node* node
 
         # Initial capacity
-        cdef SIZE_t init_capacity = max_split_nodes + max_leaf_nodes
+        cdef intp_t init_capacity = max_split_nodes + max_leaf_nodes
         tree._resize(init_capacity)
 
         with nogil:
@@ -285,17 +285,17 @@ cdef class UnsupervisedBestFirstTreeBuilder(UnsupervisedTreeBuilder):
         if rc == -1:
             raise MemoryError()
 
-    cdef inline int _add_split_node(
+    cdef inline intp_t _add_split_node(
         self,
         UnsupervisedSplitter splitter,
         UnsupervisedTree tree,
-        SIZE_t start,
-        SIZE_t end,
-        double impurity,
+        intp_t start,
+        intp_t end,
+        float64_t impurity,
         bint is_first,
         bint is_left,
         Node* parent,
-        SIZE_t depth,
+        intp_t depth,
         FrontierRecord* res
     ) except -1 nogil:
         """Adds node w/ partition ``[start, end)`` to the frontier. """
@@ -305,11 +305,11 @@ cdef class UnsupervisedBestFirstTreeBuilder(UnsupervisedTreeBuilder):
         cdef SplitRecord split
         cdef SplitRecord* split_ptr = <SplitRecord *>malloc(splitter.pointer_size())
 
-        cdef SIZE_t node_id
-        cdef SIZE_t n_node_samples
-        cdef SIZE_t n_constant_features = 0
-        cdef double min_impurity_decrease = self.min_impurity_decrease
-        cdef double weighted_n_node_samples
+        cdef intp_t node_id
+        cdef intp_t n_node_samples
+        cdef intp_t n_constant_features = 0
+        cdef float64_t min_impurity_decrease = self.min_impurity_decrease
+        cdef float64_t weighted_n_node_samples
         cdef bint is_leaf
 
         splitter.node_reset(start, end, &weighted_n_node_samples)
@@ -382,11 +382,11 @@ cdef class UnsupervisedDepthFirstTreeBuilder(UnsupervisedTreeBuilder):
     def __cinit__(
         self,
         UnsupervisedSplitter splitter,
-        SIZE_t min_samples_split,
-        SIZE_t min_samples_leaf,
-        double min_weight_leaf,
-        SIZE_t max_depth,
-        double min_impurity_decrease
+        intp_t min_samples_split,
+        intp_t min_samples_leaf,
+        float64_t min_weight_leaf,
+        intp_t max_depth,
+        float64_t min_impurity_decrease
     ):
         self.splitter = splitter
         self.min_samples_split = min_samples_split
@@ -399,7 +399,7 @@ cdef class UnsupervisedDepthFirstTreeBuilder(UnsupervisedTreeBuilder):
         self,
         UnsupervisedTree tree,
         object X,
-        const DOUBLE_t[:] sample_weight=None
+        const float64_t[:] sample_weight=None
     ):
         """Build a decision tree from the training set (X, y)."""
 
@@ -407,10 +407,10 @@ cdef class UnsupervisedDepthFirstTreeBuilder(UnsupervisedTreeBuilder):
         X, sample_weight = self._check_input(X, sample_weight)
 
         # Initial capacity
-        cdef int init_capacity
+        cdef intp_t init_capacity
 
         if tree.max_depth <= 10:
-            init_capacity = <int>(2 ** (tree.max_depth + 1)) - 1
+            init_capacity = <intp_t>(2 ** (tree.max_depth + 1)) - 1
         else:
             init_capacity = 2047
 
@@ -418,23 +418,23 @@ cdef class UnsupervisedDepthFirstTreeBuilder(UnsupervisedTreeBuilder):
 
         # Parameters
         cdef UnsupervisedSplitter splitter = self.splitter
-        cdef SIZE_t max_depth = self.max_depth
-        cdef SIZE_t min_samples_leaf = self.min_samples_leaf
-        cdef double min_weight_leaf = self.min_weight_leaf
-        cdef SIZE_t min_samples_split = self.min_samples_split
-        cdef double min_impurity_decrease = self.min_impurity_decrease
+        cdef intp_t max_depth = self.max_depth
+        cdef intp_t min_samples_leaf = self.min_samples_leaf
+        cdef float64_t min_weight_leaf = self.min_weight_leaf
+        cdef intp_t min_samples_split = self.min_samples_split
+        cdef float64_t min_impurity_decrease = self.min_impurity_decrease
 
         # Recursive partition (without actual recursion)
         splitter.init(X, sample_weight)
 
-        cdef SIZE_t start
-        cdef SIZE_t end
-        cdef SIZE_t depth
-        cdef SIZE_t parent
+        cdef intp_t start
+        cdef intp_t end
+        cdef intp_t depth
+        cdef intp_t parent
         cdef bint is_left
-        cdef SIZE_t n_node_samples = splitter.n_samples
-        cdef double weighted_n_node_samples
-        cdef SIZE_t node_id
+        cdef intp_t n_node_samples = splitter.n_samples
+        cdef float64_t weighted_n_node_samples
+        cdef intp_t node_id
 
         # initialize record to keep track of split node data and a pointer to the
         # memory address containing the split node
@@ -442,12 +442,12 @@ cdef class UnsupervisedDepthFirstTreeBuilder(UnsupervisedTreeBuilder):
         cdef SplitRecord split
         cdef SplitRecord* split_ptr = <SplitRecord *>malloc(splitter.pointer_size())
 
-        cdef double impurity = INFINITY
-        cdef SIZE_t n_constant_features
+        cdef float64_t impurity = INFINITY
+        cdef intp_t n_constant_features
         cdef bint is_leaf
         cdef bint first = 1
-        cdef SIZE_t max_depth_seen = -1
-        cdef int rc = 0
+        cdef intp_t max_depth_seen = -1
+        cdef intp_t rc = 0
 
         cdef stack[StackRecord] builder_stack
         cdef StackRecord stack_record
@@ -572,45 +572,45 @@ cdef class UnsupervisedTree(BaseTree):
 
     Attributes
     ----------
-    node_count : int
+    node_count : intp_t
         The number of nodes (internal nodes + leaves) in the tree.
 
-    capacity : int
+    capacity : intp_t
         The current capacity (i.e., size) of the arrays, which is at least as
         great as `node_count`.
 
-    max_depth : int
+    max_depth : intp_t
         The depth of the tree, i.e. the maximum depth of its leaves.
 
-    children_left : array of int, shape [node_count]
+    children_left : array of intp_t, shape [node_count]
         children_left[i] holds the node id of the left child of node i.
         For leaves, children_left[i] == TREE_LEAF. Otherwise,
         children_left[i] > i. This child handles the case where
         X[:, feature[i]] <= threshold[i].
 
-    children_right : array of int, shape [node_count]
+    children_right : array of intp_t, shape [node_count]
         children_right[i] holds the node id of the right child of node i.
         For leaves, children_right[i] == TREE_LEAF. Otherwise,
         children_right[i] > i. This child handles the case where
         X[:, feature[i]] > threshold[i].
 
-    feature : array of int, shape [node_count]
+    feature : array of intp_t, shape [node_count]
         feature[i] holds the feature to split on, for the internal node i.
 
-    threshold : array of double, shape [node_count]
+    threshold : array of float64_t, shape [node_count]
         threshold[i] holds the threshold for the internal node i.
 
-    value : array of double, shape [node_count]
+    value : array of float64_t, shape [node_count]
         Contains the constant prediction value of each node.
 
-    impurity : array of double, shape [node_count]
+    impurity : array of float64_t, shape [node_count]
         impurity[i] holds the impurity (i.e., the value of the splitting
         criterion) at node i.
 
-    n_node_samples : array of int, shape [node_count]
+    n_node_samples : array of intp_t, shape [node_count]
         n_node_samples[i] holds the number of training samples reaching node i.
 
-    weighted_n_node_samples : array of double, shape [node_count]
+    weighted_n_node_samples : array of float64_t, shape [node_count]
         weighted_n_node_samples[i] holds the weighted number of training samples
         reaching node i.
     """
@@ -655,7 +655,7 @@ cdef class UnsupervisedTree(BaseTree):
     def value(self):
         return self._get_value_ndarray()[:self.node_count]
 
-    def __cinit__(self, int n_features):
+    def __cinit__(self, intp_t n_features):
         """Constructor."""
         # Input/Output layout
         self.n_features = n_features
@@ -718,7 +718,7 @@ cdef class UnsupervisedTree(BaseTree):
         memcpy(self.nodes, cnp.PyArray_DATA(node_ndarray),
                self.capacity * sizeof(Node))
         memcpy(self.value, cnp.PyArray_DATA(value_ndarray),
-               self.capacity * self.value_stride * sizeof(double))
+               self.capacity * self.value_stride * sizeof(float64_t))
 
     cdef cnp.ndarray _get_value_ndarray(self):
         """Wraps value as a 3-d NumPy array.
@@ -784,12 +784,12 @@ def _check_value_ndarray(value_ndarray, expected_dtype, expected_shape):
     )
 
 
-def _dtype_to_dict(dtype):
+def _float32_to_dict(dtype):
     return {name: dt.str for name, (dt, *rest) in dtype.fields.items()}
 
 
 def _dtype_dict_with_modified_bitness(dtype_dict):
-    # field names in Node struct with SIZE_t types (see sklearn/tree/_tree.pxd)
+    # field names in Node struct with intp_t types (see sklearn/tree/_tree.pxd)
     indexing_field_names = ["left_child", "right_child", "feature", "n_node_samples"]
 
     expected_dtype_size = str(struct.calcsize("P"))
@@ -805,7 +805,7 @@ def _dtype_dict_with_modified_bitness(dtype_dict):
 
 
 def _all_compatible_dtype_dicts(dtype):
-    # The Cython code for decision trees uses platform-specific SIZE_t
+    # The Cython code for decision trees uses platform-specific intp_t
     # typed indexing fields that correspond to either i4 or i8 dtypes for
     # the matching fields in the numpy array depending on the bitness of
     # the platform (32 bit or 64 bit respectively).
@@ -821,9 +821,9 @@ def _all_compatible_dtype_dicts(dtype):
     # saved can have a different endianness than the machine where the pickle
     # is loaded
 
-    dtype_dict = _dtype_to_dict(dtype)
+    dtype_dict = _float32_to_dict(dtype)
     dtype_dict_with_modified_bitness = _dtype_dict_with_modified_bitness(dtype_dict)
-    dtype_dict_with_modified_endianness = _dtype_to_dict(dtype.newbyteorder())
+    dtype_dict_with_modified_endianness = _float32_to_dict(dtype.newbyteorder())
     dtype_dict_with_modified_bitness_and_endianness = _dtype_dict_with_modified_bitness(
         dtype_dict_with_modified_endianness
     )
@@ -852,7 +852,7 @@ def _check_node_ndarray(node_ndarray, expected_dtype):
     if node_ndarray_dtype == expected_dtype:
         return node_ndarray
 
-    node_ndarray_dtype_dict = _dtype_to_dict(node_ndarray_dtype)
+    node_ndarray_dtype_dict = _float32_to_dict(node_ndarray_dtype)
     all_compatible_dtype_dicts = _all_compatible_dtype_dicts(expected_dtype)
 
     if node_ndarray_dtype_dict not in all_compatible_dtype_dicts:

From ce36d7bae71222fc1c7368060fc23c99a636c862 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 12 Oct 2023 15:47:19 -0400
Subject: [PATCH 5/9] Adding multiview dtc example

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 examples/multiview/plot_multiview_dtc.py      | 83 ++++++++++++-------
 .../plot_multiview_axis_aligned_splitter.py   |  2 +-
 2 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/examples/multiview/plot_multiview_dtc.py b/examples/multiview/plot_multiview_dtc.py
index 84089547e..37629093c 100644
--- a/examples/multiview/plot_multiview_dtc.py
+++ b/examples/multiview/plot_multiview_dtc.py
@@ -3,34 +3,40 @@
 Analyze a multi-view dataset with a multi-view random forest
 ============================================================
 
-An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
-multivariate hypothesis test, on simulated datasets. Here, we present a simulation
-of how MIGHT is used to evaluate how a "feature set is important for predicting the target".
-
-We simulate a dataset with 1000 features, 500 samples, and a binary class target
-variable. Within each feature set, there is 500 features associated with one feature
-set, and another 500 features associated with another feature set. One could think of
-these for example as different datasets collected on the same patient in a biomedical setting.
-The first feature set (X) is strongly correlated with the target, and the second
-feature set (W) is weakly correlated with the target (y).
-
-We then use MIGHT to calculate the partial AUC of these sets.
+An example using :class:`~sktree.MultiViewRandomForestClassifier` for high-dimensional
+classification when there are multiple feature sets that are correlated with the
+target variable, ``y``. The multi-view random forest is a variant of the random forest
+that samples from each feature set uniformly, instead of sampling from all features
+uniformly. This is useful when there are multiple feature sets, and some feature sets
+have vastly different dimensionality from others.
+
+In this case, ``X`` is a matrix of shape ``(n_samples, n_features)``, where
+``n_features`` is the sum of the number of features in each feature set. If the multi-view
+structure is known, then one can pass this to the multi-view random forest via the
+``feature_set_ends`` parameter.
+
+For a visualization of how the multi-view splitter works, see
+:ref:`sphx_glr_auto_examples_splitters_plot_multiview_axis_aligned_splitter.py`.
 """
 
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
 import numpy as np
-from sklearn.datasets import make_blobs
-from sklearn.model_selection import cross_val_score
 import pandas as pd
 import seaborn as sns
-import matplotlib.pyplot as plt
-from collections import defaultdict
+from sklearn.datasets import make_blobs
+from sklearn.model_selection import cross_val_score
+
 from sktree import MultiViewRandomForestClassifier, RandomForestClassifier
 
 seed = 12345
 rng = np.random.default_rng(seed)
 
 
-def make_multiview_classification(n_samples=100, n_features_1=5, n_features_2=1000, cluster_std=2.0, seed=None):
+def make_multiview_classification(
+    n_samples=100, n_features_1=5, n_features_2=1000, cluster_std=2.0, seed=None
+):
     rng = np.random.default_rng(seed=seed)
 
     # Create a high-dimensional multiview dataset with a low-dimensional informative
@@ -56,8 +62,11 @@ def make_multiview_classification(n_samples=100, n_features_1=5, n_features_2=10
     X = np.vstack((X0, X1))
     y = np.hstack((y0, y1)).T
 
+    X = X + rng.standard_normal(size=X.shape)
+
     return X, y
 
+
 # %%
 # Simulate data
 # -------------
@@ -66,12 +75,18 @@ def make_multiview_classification(n_samples=100, n_features_1=5, n_features_2=10
 # dimensions. The sample-size will be kept fixed, so we can compare the performance of
 # regular Random forests with Multi-view Random Forests.
 
-n_samples = 1000
-n_features_views = np.linspace(5, 1000, 2).astype(int)
+n_samples = 100
+n_features_views = np.linspace(5, 10000, 5).astype(int)
 
 datasets = []
 for idx, n_features in enumerate(n_features_views):
-    X, y = make_multiview_classification(n_samples=n_samples, n_features_1=5, n_features_2=n_features, cluster_std=2.0, seed=seed + idx)
+    X, y = make_multiview_classification(
+        n_samples=n_samples,
+        n_features_1=5,
+        n_features_2=n_features,
+        cluster_std=2.0,
+        seed=seed + idx,
+    )
     datasets.append((X, y))
 
 # %%
@@ -79,46 +94,54 @@ def make_multiview_classification(n_samples=100, n_features_1=5, n_features_2=10
 # ----------------------------------------------
 # Here, we fit both forests over all the datasets.
 
-n_estimators = 50
+n_estimators = 100
 n_jobs = -1
 
 scores = defaultdict(list)
 
-for ((X, y), n_features) in zip(datasets, n_features_views):
+for idx, ((X, y), n_features) in enumerate(zip(datasets, n_features_views)):
     feature_set_ends = np.array([5, n_features + 5])
 
     rf = RandomForestClassifier(
         n_estimators=n_estimators,
         n_jobs=n_jobs,
+        random_state=seed,
     )
 
     mvrf = MultiViewRandomForestClassifier(
         n_estimators=n_estimators,
         n_jobs=n_jobs,
         feature_set_ends=n_features_views,
+        random_state=seed,
     )
 
     # obtain the cross-validation score
     rf_score = cross_val_score(rf, X, y, cv=2).mean()
     mvrf_score = cross_val_score(mvrf, X, y, cv=2).mean()
 
-    scores['rf'].append(rf_score)
-    scores['mvrf'].append(mvrf_score)
+    scores["rf"].append(rf_score)
+    scores["mvrf"].append(mvrf_score)
 
 # %%
 # Visualize scores and compare performance
 # ----------------------------------------
 # Now, we can compare the performance from the cross-validation experiment.
 
-scores['n_features'] = n_features_views
+scores["n_features"] = n_features_views
 df = pd.DataFrame(scores)
 
 # melt the dataframe, to make it easier to plot
-df = pd.melt(df, id_vars=['n_features'], var_name='model', value_name='score')
+df = pd.melt(df, id_vars=["n_features"], var_name="model", value_name="score")
 
 fig, ax = plt.subplots()
-sns.lineplot(data=df, x='n_features', y='score', hue='model', label='CV Score', ax=ax)
-ax.set_ylabel('CV Score')
-ax.set_xlabel('Number of features in second view')
-ax.set_title('Random Forest vs Multi-view Random Forest')
+sns.lineplot(data=df, x="n_features", y="score", marker="o", hue="model", ax=ax)
+ax.set_ylabel("CV Score")
+ax.set_xlabel("Number of features in second view")
+ax.set_title("Random Forest vs Multi-view Random Forest")
 plt.show()
+
+# %%
+# As we can see, the multi-view random forest outperforms the regular random forest
+# as the number of features in the second view increases. This is because the multi-view
+# random forest samples from each feature-set uniformly, while the regular random forest
+# samples from all features uniformly. This is a key difference between the two forests.
diff --git a/examples/splitters/plot_multiview_axis_aligned_splitter.py b/examples/splitters/plot_multiview_axis_aligned_splitter.py
index 1939d4599..7ceb200da 100644
--- a/examples/splitters/plot_multiview_axis_aligned_splitter.py
+++ b/examples/splitters/plot_multiview_axis_aligned_splitter.py
@@ -124,4 +124,4 @@
 # This is the key difference between the two splitters.
 #
 # For an example of using the multi-view splitter in practice on simulated data, see
-# :ref:`sphx_glr_auto_examples_plot_multiview_dtc.py`.
+# :ref:`sphx_glr_auto_examples_multiview_plot_multiview_dtc.py`.

From c206463490b1f7c140379180c1b1526caac72dd7 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 12 Oct 2023 15:53:34 -0400
Subject: [PATCH 6/9] Organize examples

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 examples/calibration/README.txt               |   6 +
 .../plot_overlapping_gaussians.py             |   0
 ...gigantic_hypothesis_testing_forest copy.py | 138 ------------------
 examples/plot_multiview_dtc.py                | 127 ----------------
 examples/sklearn_vs_sktree/README.txt         |   6 +
 .../{ => sklearn_vs_sktree}/plot_iris_dtc.py  |   0
 examples/sparse_oblique_trees/README.txt      |   6 +
 .../plot_extra_oblique_random_forest.py       |   0
 .../plot_extra_orf_sample_size.py             |   0
 ...ique_axis_aligned_forests_sparse_parity.py |   0
 .../plot_oblique_forests_iris.py              |   0
 .../plot_oblique_random_forest.py             |   0
 12 files changed, 18 insertions(+), 265 deletions(-)
 create mode 100644 examples/calibration/README.txt
 rename examples/{ => calibration}/plot_overlapping_gaussians.py (100%)
 delete mode 100644 examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py
 delete mode 100644 examples/plot_multiview_dtc.py
 create mode 100644 examples/sklearn_vs_sktree/README.txt
 rename examples/{ => sklearn_vs_sktree}/plot_iris_dtc.py (100%)
 create mode 100644 examples/sparse_oblique_trees/README.txt
 rename examples/{ => sparse_oblique_trees}/plot_extra_oblique_random_forest.py (100%)
 rename examples/{ => sparse_oblique_trees}/plot_extra_orf_sample_size.py (100%)
 rename examples/{ => sparse_oblique_trees}/plot_oblique_axis_aligned_forests_sparse_parity.py (100%)
 rename examples/{ => sparse_oblique_trees}/plot_oblique_forests_iris.py (100%)
 rename examples/{ => sparse_oblique_trees}/plot_oblique_random_forest.py (100%)

diff --git a/examples/calibration/README.txt b/examples/calibration/README.txt
new file mode 100644
index 000000000..84d5f5758
--- /dev/null
+++ b/examples/calibration/README.txt
@@ -0,0 +1,6 @@
+.. _calibration_examples:
+
+Calibrated decision trees via honesty
+-------------------------------------
+
+Examples demonstrating the usage of honest decision trees to obtain calibrated predictions.
diff --git a/examples/plot_overlapping_gaussians.py b/examples/calibration/plot_overlapping_gaussians.py
similarity index 100%
rename from examples/plot_overlapping_gaussians.py
rename to examples/calibration/plot_overlapping_gaussians.py
diff --git a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py b/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py
deleted file mode 100644
index 423bc63dc..000000000
--- a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest copy.py	
+++ /dev/null
@@ -1,138 +0,0 @@
-"""
-===========================================================
-Mutual Information for Gigantic Hypothesis Testing (MIGHT)
-===========================================================
-
-An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
-multivariate hypothesis test, on simulated datasets. Here, we present a simulation
-of how MIGHT is used to test the hypothesis that a "feature set is important for
-predicting the target". This is a generalization of the framework presented in
-:footcite:`coleman2022scalable`.
-
-We simulate a dataset with 1000 features, 500 samples, and a binary class target
-variable. Within each feature set, there is 500 features associated with one feature
-set, and another 500 features associated with another feature set. One could think of
-these for example as different datasets collected on the same patient in a biomedical setting.
-The first feature set (X) is strongly correlated with the target, and the second
-feature set (W) is weakly correlated with the target (y). Here, we are testing the
-null hypothesis:
-
-- ``H0: I(X; y) - I(X, W; y) = 0``
-- ``HA: I(X; y) - I(X, W; y) < 0`` indicating that there is more mutual information with
-    respect to ``y``
-
-where ``I`` is mutual information. For example, this could be true in the following settings,
-where X is our informative feature set and W is our uninformative feature set.
-
-- ``W    X -> y``: here ``W`` is completely disconnected from X and y.
-- ``W -> X -> y``: here ``W`` is d-separated from y given X.
-- ``W <- X -> y``: here ``W`` is d-separated from y given X.
-
-We then use MIGHT to test the hypothesis that the first feature set is important for
-predicting the target, and the second feature set is not important for predicting the
-target. We use :class:`~sktree.stats.FeatureImportanceForestClassifier`.
-"""
-
-import numpy as np
-from scipy.special import expit
-
-from sktree import HonestForestClassifier
-from sktree.stats import FeatureImportanceForestClassifier
-from sktree.tree import DecisionTreeClassifier
-
-seed = 12345
-rng = np.random.default_rng(seed)
-
-# %%
-# Simulate data
-# -------------
-# We simulate the two feature sets, and the target variable. We then combine them
-# into a single dataset to perform hypothesis testing.
-
-n_samples = 1000
-n_features_set = 500
-mean = 1.0
-sigma = 2.0
-beta = 5.0
-
-unimportant_mean = 0.0
-unimportant_sigma = 4.5
-
-# first sample the informative features, and then the uniformative features
-X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
-X_important = np.hstack(
-    [
-        X_important,
-        rng.normal(
-            loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
-        ),
-    ]
-)
-
-X_unimportant = rng.normal(
-    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
-)
-X = np.hstack([X_important, X_unimportant])
-
-# simulate the binary target variable
-y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)
-
-# %%
-# Perform hypothesis testing using Mutual Information
-# ---------------------------------------------------
-# Here, we use :class:`~sktree.stats.FeatureImportanceForestClassifier` to perform the hypothesis
-# test. The test statistic is computed by comparing the metric (i.e. mutual information) estimated
-# between two forests. One forest is trained on the original dataset, and one forest is trained
-# on a permuted dataset, where the rows of the ``covariate_index`` columns are shuffled randomly.
-#
-# The null distribution is then estimated in an efficient manner using the framework of
-# :footcite:`coleman2022scalable`. The sample evaluations of each forest (i.e. the posteriors)
-# are sampled randomly ``n_repeats`` times to generate a null distribution. The pvalue is then
-# computed as the proportion of samples in the null distribution that are less than the
-# observed test statistic.
-
-n_estimators = 200
-max_features = "sqrt"
-test_size = 0.2
-n_repeats = 1000
-n_jobs = -1
-
-est = FeatureImportanceForestClassifier(
-    estimator=HonestForestClassifier(
-        n_estimators=n_estimators,
-        max_features=max_features,
-        tree_estimator=DecisionTreeClassifier(),
-        random_state=seed,
-        honest_fraction=0.7,
-        n_jobs=n_jobs,
-    ),
-    random_state=seed,
-    test_size=test_size,
-    permute_per_tree=True,
-    sample_dataset_per_tree=False,
-)
-
-print(
-    f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
-    f"{est.sample_dataset_per_tree}"
-)
-# we test for the first feature set, which is important and thus should return a pvalue < 0.05
-stat, pvalue = est.test(
-    X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats
-)
-print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
-
-# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
-stat, pvalue = est.test(
-    X,
-    y,
-    covariate_index=np.arange(n_features_set, dtype=int) + n_features_set,
-    metric="mi",
-    n_repeats=n_repeats,
-)
-print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
-
-# %%
-# References
-# ----------
-# .. footbibliography::
diff --git a/examples/plot_multiview_dtc.py b/examples/plot_multiview_dtc.py
deleted file mode 100644
index bd67940db..000000000
--- a/examples/plot_multiview_dtc.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-============================================================
-Analyze a multi-view dataset with a multi-view random forest
-============================================================
-
-An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
-multivariate hypothesis test, on simulated datasets. Here, we present a simulation
-of how MIGHT is used to evaluate how a "feature set is important for predicting the target".
-
-We simulate a dataset with 1000 features, 500 samples, and a binary class target
-variable. Within each feature set, there is 500 features associated with one feature
-set, and another 500 features associated with another feature set. One could think of
-these for example as different datasets collected on the same patient in a biomedical setting.
-The first feature set (X) is strongly correlated with the target, and the second
-feature set (W) is weakly correlated with the target (y).
-
-We then use MIGHT to calculate the partial AUC of these sets.
-"""
-
-import numpy as np
-from scipy.special import expit
-
-from sktree import HonestForestClassifier
-from sktree.stats import FeatureImportanceForestClassifier
-from sktree.tree import DecisionTreeClassifier
-
-seed = 12345
-rng = np.random.default_rng(seed)
-
-# %%
-# Simulate data
-# -------------
-# We simulate the two feature sets, and the target variable. We then combine them
-# into a single dataset to perform hypothesis testing.
-
-n_samples = 1000
-n_features_set = 500
-mean = 1.0
-sigma = 2.0
-beta = 5.0
-
-unimportant_mean = 0.0
-unimportant_sigma = 4.5
-
-# first sample the informative features, and then the uniformative features
-X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
-X_important = np.hstack(
-    [
-        X_important,
-        rng.normal(
-            loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
-        ),
-    ]
-)
-
-X_unimportant = rng.normal(
-    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
-)
-
-# simulate the binary target variable
-y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)
-
-# %%
-# Use partial AUC as test statistic
-# ---------------------------------
-# You can specify the maximum specificity by modifying ``max_fpr`` in ``statistic``.
-
-n_estimators = 125
-max_features = "sqrt"
-metric = "auc"
-test_size = 0.2
-n_jobs = -1
-honest_fraction = 0.7
-max_fpr = 0.1
-
-est = FeatureImportanceForestClassifier(
-    estimator=HonestForestClassifier(
-        n_estimators=n_estimators,
-        max_features=max_features,
-        tree_estimator=DecisionTreeClassifier(),
-        random_state=seed,
-        honest_fraction=honest_fraction,
-        n_jobs=n_jobs,
-    ),
-    random_state=seed,
-    test_size=test_size,
-    permute_per_tree=True,
-    sample_dataset_per_tree=True,
-)
-
-# we test for the first feature set, which is important and thus should return a higher AUC
-stat, posterior_arr, samples = est.statistic(
-    X_important,
-    y,
-    metric=metric,
-    return_posteriors=True,
-)
-
-print(f"ASH-90 / Partial AUC: {stat}")
-print(f"Shape of Observed Samples: {samples.shape}")
-print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")
-
-# %%
-# Repeat for the second feature set
-# ---------------------------------
-# This feature set has a smaller statistic, which is expected due to its weak correlation.
-
-stat, posterior_arr, samples = est.statistic(
-    X_unimportant,
-    y,
-    metric=metric,
-    return_posteriors=True,
-)
-
-print(f"ASH-90 / Partial AUC: {stat}")
-print(f"Shape of Observed Samples: {samples.shape}")
-print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")
-
-# %%
-# All posteriors are saved within the model
-# -----------------------------------------
-# Extract the results from the model variables anytime. You can save the model with ``pickle``.
-#
-# ASH-90 / Partial AUC: ``est.observe_stat_``
-# Observed Samples: ``est.observe_samples_``
-# Tree Posteriors for the positive class: ``est.observe_posteriors_`` (n_trees, n_samples_test, 1)
-# True Labels: ``est.y_true_final_``
diff --git a/examples/sklearn_vs_sktree/README.txt b/examples/sklearn_vs_sktree/README.txt
new file mode 100644
index 000000000..d942d71f8
--- /dev/null
+++ b/examples/sklearn_vs_sktree/README.txt
@@ -0,0 +1,6 @@
+.. _sklearn_examples:
+
+Comparing sklearn and sktree decision trees
+-------------------------------------------
+
+Examples demonstrating the difference between sklearn and sktree decision trees.
diff --git a/examples/plot_iris_dtc.py b/examples/sklearn_vs_sktree/plot_iris_dtc.py
similarity index 100%
rename from examples/plot_iris_dtc.py
rename to examples/sklearn_vs_sktree/plot_iris_dtc.py
diff --git a/examples/sparse_oblique_trees/README.txt b/examples/sparse_oblique_trees/README.txt
new file mode 100644
index 000000000..61c596af1
--- /dev/null
+++ b/examples/sparse_oblique_trees/README.txt
@@ -0,0 +1,6 @@
+.. _sporf_examples:
+
+Sparse oblique projections with oblique decision-trees
+------------------------------------------------------
+
+Examples demonstrating learning using oblique random forests.
diff --git a/examples/plot_extra_oblique_random_forest.py b/examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
similarity index 100%
rename from examples/plot_extra_oblique_random_forest.py
rename to examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
diff --git a/examples/plot_extra_orf_sample_size.py b/examples/sparse_oblique_trees/plot_extra_orf_sample_size.py
similarity index 100%
rename from examples/plot_extra_orf_sample_size.py
rename to examples/sparse_oblique_trees/plot_extra_orf_sample_size.py
diff --git a/examples/plot_oblique_axis_aligned_forests_sparse_parity.py b/examples/sparse_oblique_trees/plot_oblique_axis_aligned_forests_sparse_parity.py
similarity index 100%
rename from examples/plot_oblique_axis_aligned_forests_sparse_parity.py
rename to examples/sparse_oblique_trees/plot_oblique_axis_aligned_forests_sparse_parity.py
diff --git a/examples/plot_oblique_forests_iris.py b/examples/sparse_oblique_trees/plot_oblique_forests_iris.py
similarity index 100%
rename from examples/plot_oblique_forests_iris.py
rename to examples/sparse_oblique_trees/plot_oblique_forests_iris.py
diff --git a/examples/plot_oblique_random_forest.py b/examples/sparse_oblique_trees/plot_oblique_random_forest.py
similarity index 100%
rename from examples/plot_oblique_random_forest.py
rename to examples/sparse_oblique_trees/plot_oblique_random_forest.py

From c34f20649c3de46dbb930b920dbfbe28525253bc Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 12 Oct 2023 16:11:49 -0400
Subject: [PATCH 7/9] Fix docs

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .gitignore                                                    | 1 +
 doc/modules/ensemble.rst                                      | 4 ++--
 .../sparse_oblique_trees/plot_extra_oblique_random_forest.py  | 2 +-
 examples/sparse_oblique_trees/plot_oblique_random_forest.py   | 2 +-
 examples/splitters/plot_sparse_projection_matrix.py           | 4 ++--
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 39a3a651a..db44ed79b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ doc/coverages
 doc/samples
 cover
 examples/*.jpg
+examples/**/*.jpg
 
 env/
 html/
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 1f76b888f..38685efad 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -22,8 +22,8 @@ more information and intuition, see
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_plot_oblique_random_forest.py`
- * :ref:`sphx_glr_auto_examples_plot_oblique_axis_aligned_forests_sparse_parity.py`
+ * :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_random_forest.py`
+ * :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_axis_aligned_forests_sparse_parity.py`
 
 .. topic:: References
 
diff --git a/examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py b/examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
index 586edd833..fd920cc16 100644
--- a/examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
+++ b/examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
@@ -59,7 +59,7 @@
 range, hence the complexity is `O(n)`. This makes the algorithm more suitable for large datasets.
 
 To see how sample-sizes affect the performance of Extra Oblique Trees vs regular Oblique Trees,
-see :ref:`sphx_glr_auto_examples_plot_extra_orf_sample_size.py`
+see :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_extra_orf_sample_size.py`
 
 References
 ----------
diff --git a/examples/sparse_oblique_trees/plot_oblique_random_forest.py b/examples/sparse_oblique_trees/plot_oblique_random_forest.py
index 34cb8c68a..dca3f19da 100644
--- a/examples/sparse_oblique_trees/plot_oblique_random_forest.py
+++ b/examples/sparse_oblique_trees/plot_oblique_random_forest.py
@@ -18,7 +18,7 @@
 are subsampled due to computational constraints.
 
 For an example of using extra-oblique trees/forests in practice on data, see the following
-example :ref:`sphx_glr_auto_examples_plot_extra_oblique_random_forest.py`.
+example :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_extra_oblique_random_forest.py`.
 """
 
 from datetime import datetime
diff --git a/examples/splitters/plot_sparse_projection_matrix.py b/examples/splitters/plot_sparse_projection_matrix.py
index 6eb53e0ca..c77c128a0 100644
--- a/examples/splitters/plot_sparse_projection_matrix.py
+++ b/examples/splitters/plot_sparse_projection_matrix.py
@@ -125,5 +125,5 @@
 # For an example of using oblique trees/forests in practice on data, see the following
 # examples:
 #
-# - :ref:`sphx_glr_auto_examples_plot_oblique_forests_iris.py`
-# - :ref:`sphx_glr_auto_examples_plot_oblique_random_forest.py`
+# - :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_forests_iris.py`
+# - :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_random_forest.py`

From 92a79e8013fe5280b162a493e5dced7bbed69c7a Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 12 Oct 2023 16:11:58 -0400
Subject: [PATCH 8/9] Fix docs

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/modules/supervised_tree.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/supervised_tree.rst b/doc/modules/supervised_tree.rst
index 91824e04c..1aa0a602a 100644
--- a/doc/modules/supervised_tree.rst
+++ b/doc/modules/supervised_tree.rst
@@ -41,7 +41,7 @@ as follows:
     >>> clf = tree.ObliqueDecisionTreeClassifier()
     >>> clf = clf.fit(X, y)
 
-.. figure:: ../auto_examples/images/sphx_glr_plot_iris_dtc_002.png
+.. figure:: ../auto_examples/sklearn_vs_sktree/images/sphx_glr_plot_iris_dtc_002.png
    :target: ../auto_examples/plot_iris_dtc.html
    :align: center
 

From 993c30fee8367411592baaa930099b03b422d06b Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 13 Oct 2023 10:07:24 -0400
Subject: [PATCH 9/9] Adding imbalanced MIGHT example

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .../plot_MI_imbalanced_hyppo_testing.py       | 221 +++++++++++++-----
 1 file changed, 160 insertions(+), 61 deletions(-)

diff --git a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
index 14c260725..882f80c3d 100644
--- a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
+++ b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
@@ -3,42 +3,34 @@
 Mutual Information for Gigantic Hypothesis Testing (MIGHT) with Imbalanced Data
 ===============================================================================
 
-An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
-multivariate hypothesis test, on simulated datasets. Here, we present a simulation
-of how MIGHT is used to test the hypothesis that a "feature set is important for
-predicting the target". This is a generalization of the framework presented in
-:footcite:`coleman2022scalable`.
-
-We simulate a dataset with 1000 features, 500 samples, and a binary class target
-variable. Within each feature set, there is 500 features associated with one feature
-set, and another 500 features associated with another feature set. One could think of
-these for example as different datasets collected on the same patient in a biomedical setting.
-The first feature set (X) is strongly correlated with the target, and the second
-feature set (W) is weakly correlated with the target (y). Here, we are testing the
-null hypothesis:
-
-- ``H0: I(X; y) - I(X, W; y) = 0``
-- ``HA: I(X; y) - I(X, W; y) < 0`` indicating that there is more mutual information with
-    respect to ``y``
-
-where ``I`` is mutual information. For example, this could be true in the following settings,
-where X is our informative feature set and W is our uninformative feature set.
-
-- ``W    X -> y``: here ``W`` is completely disconnected from X and y.
-- ``W -> X -> y``: here ``W`` is d-separated from y given X.
-- ``W <- X -> y``: here ``W`` is d-separated from y given X.
-
-We then use MIGHT to test the hypothesis that the first feature set is important for
-predicting the target, and the second feature set is not important for predicting the
-target. We use :class:`~sktree.stats.FeatureImportanceForestClassifier`.
+Here, we demonstrate how to do hypothesis testing on highly imbalanced data
+in terms of their feature-set dimensionalities.
+using mutual information as a test statistic. We use the framework of
+:footcite:`coleman2022scalable` to estimate pvalues efficiently.
+
+Here, we simulate two feature sets, one of which is important for the target,
+but significantly smaller in dimensionality than the other feature set, which
+is unimportant for the target. We then use the MIGHT framework to test for
+the importance of each feature set. Instead of leveraging a normal honest random
+forest to estimate the posteriors, here we leverage a multi-view honest random
+forest, with knowledge of the multi-view structure of the ``X`` data.
+
+For other examples of hypothesis testing, see the following:
+
+- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_MI_gigantic_hypothesis_testing_forest.py`
+- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_might_auc.py`
+
+For more information on the multi-view decision-tree, see
+:ref:`sphx_glr_auto_examples_multiview_plot_multiview_dtc.py`.
 """
 
+import matplotlib.pyplot as plt
 import numpy as np
-from scipy.special import expit
+from sklearn.datasets import make_blobs
 
 from sktree import HonestForestClassifier
 from sktree.stats import FeatureImportanceForestClassifier
-from sktree.tree import DecisionTreeClassifier
+from sktree.tree import DecisionTreeClassifier, MultiViewDecisionTreeClassifier
 
 seed = 12345
 rng = np.random.default_rng(seed)
@@ -48,35 +40,67 @@
 # -------------
 # We simulate the two feature sets, and the target variable. We then combine them
 # into a single dataset to perform hypothesis testing.
+seed = 12345
+rng = np.random.default_rng(seed)
 
-n_samples = 1000
-n_features_set = 500
-mean = 1.0
-sigma = 2.0
-beta = 5.0
-
-unimportant_mean = 0.0
-unimportant_sigma = 4.5
-
-# first sample the informative features, and then the uniformative features
-X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
-X_important = np.hstack(
-    [
-        X_important,
-        rng.normal(
-            loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
-        ),
-    ]
-)
 
-X_unimportant = rng.normal(
-    loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
+def make_multiview_classification(
+    n_samples=100, n_features_1=10, n_features_2=1000, cluster_std=2.0, seed=None
+):
+    rng = np.random.default_rng(seed=seed)
+
+    # Create a high-dimensional multiview dataset with a low-dimensional informative
+    # subspace in one view of the dataset.
+    X0_first, y0 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1 // 2,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    X1_first, y1 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1 // 2,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    # create the first views for y=0 and y=1
+    X0_first = np.concatenate(
+        (X0_first, rng.standard_normal(size=(n_samples, n_features_1 // 2))), axis=1
+    )
+    X1_first = np.concatenate(
+        (X1_first, rng.standard_normal(size=(n_samples, n_features_1 // 2))), axis=1
+    )
+    y1[:] = 1
+
+    # add the second view for y=0 and y=1, which is completely noise
+    X0 = np.concatenate([X0_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X1 = np.concatenate([X1_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+
+    # combine the views and targets
+    X = np.vstack((X0, X1))
+    y = np.hstack((y0, y1)).T
+
+    # add noise to the data
+    X = X + rng.standard_normal(size=X.shape)
+
+    return X, y
+
+
+n_samples = 100
+n_features = 10000
+n_features_views = [10, n_features]
+
+X, y = make_multiview_classification(
+    n_samples=n_samples,
+    n_features_1=10,
+    n_features_2=n_features,
+    cluster_std=2.0,
+    seed=seed,
 )
-X = np.hstack([X_important, X_unimportant])
-
-# simulate the binary target variable
-y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)
-
 # %%
 # Perform hypothesis testing using Mutual Information
 # ---------------------------------------------------
@@ -101,37 +125,112 @@
     estimator=HonestForestClassifier(
         n_estimators=n_estimators,
         max_features=max_features,
-        tree_estimator=DecisionTreeClassifier(),
+        tree_estimator=MultiViewDecisionTreeClassifier(feature_set_ends=n_features_views),
         random_state=seed,
-        honest_fraction=0.7,
+        honest_fraction=0.5,
         n_jobs=n_jobs,
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=True,
+    permute_per_tree=False,
     sample_dataset_per_tree=False,
 )
 
+mv_results = dict()
+
 print(
     f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
     f"{est.sample_dataset_per_tree}"
 )
 # we test for the first feature set, which is important and thus should return a pvalue < 0.05
 stat, pvalue = est.test(
-    X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats
+    X, y, covariate_index=np.arange(10, dtype=int), metric="mi", n_repeats=n_repeats
 )
+mv_results["important_feature_stat"] = stat
+mv_results["important_feature_pvalue"] = pvalue
 print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
 
 # we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
 stat, pvalue = est.test(
     X,
     y,
-    covariate_index=np.arange(n_features_set, dtype=int) + n_features_set,
+    covariate_index=np.arange(10, n_features, dtype=int),
     metric="mi",
     n_repeats=n_repeats,
 )
+mv_results["unimportant_feature_stat"] = stat
+mv_results["unimportant_feature_pvalue"] = pvalue
 print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
 
+# %%
+# Let's investigate what happens when we do not use a multi-view decision tree.
+# All other parameters are kept the same.
+
+est = FeatureImportanceForestClassifier(
+    estimator=HonestForestClassifier(
+        n_estimators=n_estimators,
+        max_features=max_features,
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=seed,
+        honest_fraction=0.5,
+        n_jobs=n_jobs,
+    ),
+    random_state=seed,
+    test_size=test_size,
+    permute_per_tree=False,
+    sample_dataset_per_tree=False,
+)
+
+rf_results = dict()
+
+# we test for the first feature set, which is important and thus should return a pvalue < 0.05
+stat, pvalue = est.test(
+    X, y, covariate_index=np.arange(10, dtype=int), metric="mi", n_repeats=n_repeats
+)
+rf_results["important_feature_stat"] = stat
+rf_results["important_feature_pvalue"] = pvalue
+print(f"Estimated MI difference using regular decision-trees: {stat} with Pvalue: {pvalue}")
+
+# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
+stat, pvalue = est.test(
+    X,
+    y,
+    covariate_index=np.arange(10, n_features, dtype=int),
+    metric="mi",
+    n_repeats=n_repeats,
+)
+rf_results["unimportant_feature_stat"] = stat
+rf_results["unimportant_feature_pvalue"] = pvalue
+print(f"Estimated MI difference using regular decision-trees: {stat} with Pvalue: {pvalue}")
+
+fig, ax = plt.subplots(figsize=(5, 3))
+
+# plot pvalues
+ax.bar(0, rf_results["important_feature_pvalue"], label="Important Feature Set (RF)")
+ax.bar(1, rf_results["unimportant_feature_pvalue"], label="Unimportant Feature Set (RF)")
+ax.bar(2, mv_results["important_feature_pvalue"], label="Important Feature Set (MV)")
+ax.bar(3, mv_results["unimportant_feature_pvalue"], label="Unimportant Feature Set (MV)")
+ax.axhline(0.05, color="k", linestyle="--", label="alpha=0.05")
+ax.set(ylabel="Log10(PValue)", xlim=[-0.5, 3.5], yscale="log")
+ax.legend()
+
+fig.tight_layout()
+plt.show()
+
+# %%
+# Discussion
+# ----------
+# We see that the multi-view decision tree is able to detect the important feature set,
+# while the regular decision tree is not. This is because the regular decision tree
+# is not aware of the multi-view structure of the data, and thus is challenged
+# by the imbalanced dimensionality of the feature sets. I.e. it rarely splits on
+# the first low-dimensional feature set, and thus is unable to detect its importance.
+#
+# Note both approaches still fail to reject the null hypothesis (for alpha of 0.05)
+# when testing the unimportant feature set. The difference in the two approaches
+# show the statistical power of the multi-view decision tree is higher than the
+# regular decision tree in this simulation.
+
 # %%
 # References
 # ----------