diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index 12c35fb54..b2c415c49 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -19,6 +19,17 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  create_artifacts_folder:
+    name: Create Artifacts Folder
+    runs-on: ubuntu-latest
+    outputs:
+      artifacts_folder: ${{ steps.set_folder.outputs.folder_name }}
+
+    steps:
+      - name: Set Artifacts Folder
+        id: set_folder
+        run: echo "folder_name=artifacts/${{ github.run_id }}" >> $GITHUB_ENV
+
   build_wheels:
     name: Build wheels on ${{ matrix.os[1] }} - ${{ matrix.os[2] }} with Python ${{ matrix.python[0] }}
     runs-on: ${{ matrix.os[0] }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a943e3d13..32298fc7f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -249,52 +249,6 @@ jobs:
           name: sktree-build
           path: $PWD/build
 
-  # release is ran when a release is made on Github
-  release:
-    name: Release
-    runs-on: ubuntu-latest
-    needs: [build_and_test_slow]
-    if: startsWith(github.ref, 'refs/tags/')
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-      - name: Setup Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4.6.1
-        with:
-          python-version: 3.9
-          architecture: "x64"
-      - name: Install dependencies
-        run: |
-          python -m pip install --progress-bar off --upgrade pip setuptools wheel
-          python -m pip install --progress-bar off build twine
-      - name: Prepare environment
-        run: |
-          echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV
-          echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
-      - name: Download package distribution files
-        uses: actions/download-artifact@v3
-        with:
-          name: package
-          path: dist
-      # TODO: refactor scripts to generate release notes from `whats_new.rst` file instead
-      # - name: Generate release notes
-      #   run: |
-      #     python scripts/release_notes.py > ${{ github.workspace }}-RELEASE_NOTES.md
-      - name: Publish package to PyPI
-        run: |
-          twine upload -u ${{ secrets.PYPI_USERNAME }} -p ${{ secrets.PYPI_PASSWORD }} dist/*
-      - name: Publish GitHub release
-        uses: softprops/action-gh-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          # body_path: ${{ github.workspace }}-RELEASE_NOTES.md
-          prerelease: ${{ contains(env.TAG, 'rc') }}
-          files: |
-            dist/*
-
-
-
     # build-windows:
     #   name: Meson build Windows
     #   runs-on: windows-2019
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 000000000..c137735f6
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,61 @@
+name: "Release to PyPI"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.number }}-${{ github.event.type }}
+  cancel-in-progress: true
+
+on:
+  workflow_run:
+    workflows: ["build_and_test_slow"]
+    types:
+      - completed
+  workflow_dispatch:
+
+env:
+  INSTALLDIR: "build-install"
+  CCACHE_DIR: "${{ github.workspace }}/.ccache"
+
+jobs:
+  # release is ran when a release is made on Github
+  release:
+    name: Release
+    runs-on: ubuntu-latest
+    needs: [build_and_test_slow]
+    if: startsWith(github.ref, 'refs/tags/')
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4.6.1
+        with:
+          python-version: 3.9
+          architecture: "x64"
+      - name: Install dependencies
+        run: |
+          python -m pip install --progress-bar off --upgrade pip setuptools wheel
+          python -m pip install --progress-bar off build twine
+      - name: Prepare environment
+        run: |
+          echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV
+          echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
+      - name: Download package distribution files
+        uses: actions/download-artifact@v3
+        with:
+          name: package
+          path: dist
+      # TODO: refactor scripts to generate release notes from `whats_new.rst` file instead
+      # - name: Generate release notes
+      #   run: |
+      #     python scripts/release_notes.py > ${{ github.workspace }}-RELEASE_NOTES.md
+      - name: Publish package to PyPI
+        run: |
+          twine upload -u ${{ secrets.PYPI_USERNAME }} -p ${{ secrets.PYPI_PASSWORD }} dist/*
+      - name: Publish GitHub release
+        uses: softprops/action-gh-release@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          # body_path: ${{ github.workspace }}-RELEASE_NOTES.md
+          prerelease: ${{ contains(env.TAG, 'rc') }}
+          files: |
+            dist/*
diff --git a/DEVELOPING.md b/DEVELOPING.md
index a9d64457c..200789d7c 100644
--- a/DEVELOPING.md
+++ b/DEVELOPING.md
@@ -76,4 +76,8 @@ Verify that installations work as expected on your machine.
 
     twine upload dist/*
 
+or if you have two-factor authentication enabled: https://pypi.org/help/#apitoken
+
+    twine upload dist/* --repository scikit-tree
+
 4. Update version number on ``meson.build`` and ``_version.py`` to the relevant version.
\ No newline at end of file
diff --git a/doc/api.rst b/doc/api.rst
index 2a74a7a01..59238d77a 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -65,6 +65,7 @@ how scikit-learn builds trees.
    PatchObliqueRandomForestClassifier
    PatchObliqueRandomForestRegressor
    HonestForestClassifier
+   MultiViewRandomForestClassifier
 
 .. currentmodule:: sktree.tree
 .. autosummary::
@@ -75,6 +76,7 @@ how scikit-learn builds trees.
    PatchObliqueDecisionTreeClassifier
    PatchObliqueDecisionTreeRegressor
    HonestTreeClassifier
+   MultiViewDecisionTreeClassifier
 
 Unsupervised
 ------------
diff --git a/doc/whats_new/v0.3.rst b/doc/whats_new/v0.3.rst
index ebae29995..fec97bb01 100644
--- a/doc/whats_new/v0.3.rst
+++ b/doc/whats_new/v0.3.rst
@@ -14,6 +14,7 @@ Changelog
 ---------
 - |Fix| Fixes a bug in consistency of train/test samples when ``random_state`` is not set in FeatureImportanceForestClassifier and FeatureImportanceForestRegressor, by `Adam Li`_ (:pr:`135`)
 - |Fix| Fixes a bug where covariate indices were not shuffled by default when running FeatureImportanceForestClassifier and FeatureImportanceForestRegressor test methods, by `Sambit Panda`_ (:pr:`140`)
+- |Enhancement| Add multi-view splitter for axis-aligned decision trees, by `Adam Li`_ (:pr:`129`)
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/examples/README.txt b/examples/README.txt
index f9f595033..e13d3e890 100644
--- a/examples/README.txt
+++ b/examples/README.txt
@@ -1,4 +1,4 @@
 Examples
---------
+========
 
 Examples demonstrating how to use scikit-tree algorithms.
diff --git a/examples/hypothesis_testing/README.txt b/examples/hypothesis_testing/README.txt
new file mode 100644
index 000000000..9a60917ca
--- /dev/null
+++ b/examples/hypothesis_testing/README.txt
@@ -0,0 +1,6 @@
+.. _hyppo_examples:
+
+Hypothesis testing with decision trees
+--------------------------------------
+
+Examples demonstrating how to use decision-trees for statistical hypothesis testing.
diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
similarity index 100%
rename from examples/plot_MI_gigantic_hypothesis_testing_forest.py
rename to examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
diff --git a/examples/plot_might_auc.py b/examples/hypothesis_testing/plot_might_auc.py
similarity index 100%
rename from examples/plot_might_auc.py
rename to examples/hypothesis_testing/plot_might_auc.py
diff --git a/examples/outlier_detection/README.txt b/examples/outlier_detection/README.txt
new file mode 100644
index 000000000..f7f13ea8e
--- /dev/null
+++ b/examples/outlier_detection/README.txt
@@ -0,0 +1,6 @@
+.. _outlier_examples:
+
+Outlier-detection
+-----------------
+
+Examples concerning how to do outlier detection with decision trees.
diff --git a/examples/plot_extended_isolation_forest.py b/examples/outlier_detection/plot_extended_isolation_forest.py
similarity index 100%
rename from examples/plot_extended_isolation_forest.py
rename to examples/outlier_detection/plot_extended_isolation_forest.py
diff --git a/examples/plot_extra_oblique_random_forest.py b/examples/plot_extra_oblique_random_forest.py
index 497d1d98c..586edd833 100644
--- a/examples/plot_extra_oblique_random_forest.py
+++ b/examples/plot_extra_oblique_random_forest.py
@@ -58,6 +58,9 @@
 are not sorted and the split is determined by randomly drawing a threshold from the feature's
 range, hence the complexity is `O(n)`. This makes the algorithm more suitable for large datasets.
 
+To see how sample-sizes affect the performance of Extra Oblique Trees vs regular Oblique Trees,
+see :ref:`sphx_glr_auto_examples_plot_extra_orf_sample_size.py`
+
 References
 ----------
 .. [1] P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized trees", Machine Learning, 63(1),
@@ -149,6 +152,7 @@ def get_scores(X, y, d_name, n_cv=5, n_repeats=1, **kwargs):
     "random_state": random_state,
     "n_cv": 10,
     "n_repeats": 1,
+    "n_jobs": -1,
 }
 
 for data_id in data_ids:
diff --git a/examples/plot_extra_orf_sample_size.py b/examples/plot_extra_orf_sample_size.py
index 78c29643f..5651d27ec 100644
--- a/examples/plot_extra_orf_sample_size.py
+++ b/examples/plot_extra_orf_sample_size.py
@@ -122,6 +122,7 @@ def get_scores(X, y, d_name, n_cv=5, n_repeats=1, **kwargs):
     "random_state": random_state,
     "n_cv": 10,
     "n_repeats": 1,
+    "n_jobs": -1,
 }
 
 for data_id in data_ids:
diff --git a/examples/plot_oblique_random_forest.py b/examples/plot_oblique_random_forest.py
index 9a7a0acb3..34cb8c68a 100644
--- a/examples/plot_oblique_random_forest.py
+++ b/examples/plot_oblique_random_forest.py
@@ -16,6 +16,9 @@
 will notice, of these three datasets, the oblique forest outperforms axis-aligned
 random forest on cnae-9 utilizing sparse random projection mechanism. All datasets
 are subsampled due to computational constraints.
+
+For an example of using extra-oblique trees/forests in practice on data, see the following
+example :ref:`sphx_glr_auto_examples_plot_extra_oblique_random_forest.py`.
 """
 
 from datetime import datetime
diff --git a/examples/splitters/README.txt b/examples/splitters/README.txt
new file mode 100644
index 000000000..40ea1e232
--- /dev/null
+++ b/examples/splitters/README.txt
@@ -0,0 +1,6 @@
+.. _splitter_examples:
+
+Decision-tree splitters
+-----------------------
+
+Examples demonstrating different node-splitting strategies for decision trees.
diff --git a/examples/splitters/plot_multiview_axis_aligned_splitter.py b/examples/splitters/plot_multiview_axis_aligned_splitter.py
new file mode 100644
index 000000000..8fc5b32c2
--- /dev/null
+++ b/examples/splitters/plot_multiview_axis_aligned_splitter.py
@@ -0,0 +1,124 @@
+"""
+=================================================================================
+Demonstrate and visualize a multi-view projection matrix for an axis-aligned tree
+=================================================================================
+
+This example shows how multi-view projection matrices are generated for a decision tree,
+specifically the :class:`sktree.tree.MultiViewDecisionTreeClassifier`.
+
+Multi-view projection matrices operate under the assumption that the input ``X`` array
+consists of multiple feature-sets that are groups of features important for predicting
+``y``.
+
+For details on how to use the hyperparameters related to the multi-view, see
+:class:`sktree.tree.MultiViewDecisionTreeClassifier`.
+"""
+
+# import modules
+# .. note:: We use a private Cython module here to demonstrate what the patches
+#           look like. This is not part of the public API. The Cython module used
+#           is just a Python wrapper for the underlying Cython code and is not the
+#           same as the Cython splitter used in the actual implementation.
+#           To use the actual splitter, one should use the public API for the
+#           relevant tree/forests class.
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.cm import ScalarMappable
+from matplotlib.colors import ListedColormap
+
+from sktree._lib.sklearn.tree._criterion import Gini
+from sktree.tree._oblique_splitter import MultiViewSplitterTester
+
+criterion = Gini(1, np.array((0, 1)))
+max_features = 5
+min_samples_leaf = 1
+min_weight_leaf = 0.0
+random_state = np.random.RandomState(10)
+
+# we "simulate" three feature sets, with 3, 2 and 4 features respectively
+feature_set_ends = np.array([3, 5, 9], dtype=np.intp)
+n_feature_sets = len(feature_set_ends)
+
+feature_combinations = 1
+monotonic_cst = None
+missing_value_feature_mask = None
+
+# initialize some dummy data
+X = np.repeat(np.arange(feature_set_ends[-1]).astype(np.float32), 5).reshape(5, -1)
+y = np.array([0, 0, 0, 1, 1]).reshape(-1, 1).astype(np.float64)
+sample_weight = np.ones(5)
+
+print("The shape of our dataset is: ", X.shape, y.shape, sample_weight.shape)
+
+# %%
+# Initialize the multi-view splitter
+# ----------------------------------
+# The multi-view splitter is a Cython class that is initialized internally
+# in scikit-tree. However, we expose a Python tester object to demonstrate
+# how the splitter works in practice.
+#
+# .. warning:: Do not use this interface directly in practice.
+
+splitter = MultiViewSplitterTester(
+    criterion,
+    max_features,
+    min_samples_leaf,
+    min_weight_leaf,
+    random_state,
+    monotonic_cst,
+    feature_combinations,
+    feature_set_ends,
+    n_feature_sets,
+)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
+
+# %%
+# Sample the projection matrix
+# ----------------------------
+# The projection matrix is sampled by the splitter. The projection
+# matrix is a (max_features, n_features) matrix that selects which features of ``X``
+# to define candidate split dimensions. The multi-view
+# splitter's projection matrix though samples from multiple feature sets,
+# which are aligned contiguously over the columns of ``X``.
+
+projection_matrix = splitter.sample_projection_matrix_py()
+print(projection_matrix)
+
+cmap = ListedColormap(["white", "green"][:n_feature_sets])
+
+# Create a heatmap to visualize the indices
+fig, ax = plt.subplots(figsize=(6, 6))
+
+ax.imshow(
+    projection_matrix, cmap=cmap, aspect=feature_set_ends[-1] / max_features, interpolation="none"
+)
+ax.axvline(feature_set_ends[0] - 0.5, color="black", linewidth=1, label="Feature Sets")
+for iend in feature_set_ends[1:]:
+    ax.axvline(iend - 0.5, color="black", linewidth=1)
+
+ax.set(title="Sampled Projection Matrix", xlabel="Feature Index", ylabel="Projection Vector Index")
+ax.set_xticks(np.arange(feature_set_ends[-1]))
+ax.set_yticks(np.arange(max_features))
+ax.set_yticklabels(np.arange(max_features, dtype=int) + 1)
+ax.set_xticklabels(np.arange(feature_set_ends[-1], dtype=int) + 1)
+ax.legend()
+
+# Create a mappable object
+sm = ScalarMappable(cmap=cmap)
+sm.set_array([])  # You can set an empty array or values here
+
+# Create a color bar with labels for each feature set
+colorbar = fig.colorbar(sm, ax=ax, ticks=[0.25, 0.75], format="%d")
+colorbar.set_label("Projection Weight (I.e. Sampled Feature From a Feature Set)")
+colorbar.ax.set_yticklabels(["0", "1"])
+
+plt.show()
+
+# %%
+# Discussion
+# ----------
+# As we can see, the multi-view splitter samples split candidates uniformly across the feature sets.
+# In contrast, the normal splitter in :class:`sklearn.tree.DecisionTreeClassifier` samples
+# randomly across all ``n_features`` features because it is not aware of the multi-view structure.
+# This is the key difference between the two splitters.
diff --git a/examples/plot_projection_matrices.py b/examples/splitters/plot_projection_matrices.py
similarity index 88%
rename from examples/plot_projection_matrices.py
rename to examples/splitters/plot_projection_matrices.py
index 7e4b83b38..9e1cbaef1 100644
--- a/examples/plot_projection_matrices.py
+++ b/examples/splitters/plot_projection_matrices.py
@@ -1,7 +1,7 @@
 """
-===============================================
-Plot the projection matrices of an oblique tree
-===============================================
+===================================================================================
+Plot the projection matrices of an oblique tree for sampling images, or time-series
+===================================================================================
 
 This example shows how projection matrices are generated for an oblique tree,
 specifically the :class:`sktree.tree.PatchObliqueDecisionTreeClassifier`.
@@ -61,6 +61,7 @@
 
 boundary = None
 feature_weight = None
+monotonic_cst = None
 missing_value_feature_mask = None
 
 # initialize some dummy data
@@ -76,18 +77,25 @@
 # Now that we have th patch splitter initialized, we can generate some patches
 # and visualize how they appear on the data. We will make the patch 1D, which
 # samples multiple rows contiguously. This is a 1D patch of size 3.
+#
+# .. warning:: Do not use this interface directly in practice.
+
 min_patch_dims = np.array((1, 1))
 max_patch_dims = np.array((3, 1))
 dim_contiguous = np.array((True, True))
 data_dims = np.array((5, 5))
 
+# Note: not used, but passed for API compatibility
+feature_combinations = 1.5
+
 splitter = BestPatchSplitterTester(
     criterion,
     max_features,
     min_samples_leaf,
     min_weight_leaf,
     random_state,
-    missing_value_feature_mask,
+    monotonic_cst,
+    feature_combinations,
     min_patch_dims,
     max_patch_dims,
     dim_contiguous,
@@ -95,10 +103,10 @@
     boundary,
     feature_weight,
 )
-splitter.init_test(X, y, sample_weight, None)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
 
 # sample the projection matrix that consists of 1D patches
-proj_mat = splitter.sample_projection_matrix()
+proj_mat = splitter.sample_projection_matrix_py()
 print(proj_mat.shape)
 
 # Visualize 1D patches
@@ -132,7 +140,8 @@
     min_samples_leaf,
     min_weight_leaf,
     random_state,
-    missing_value_feature_mask,
+    monotonic_cst,
+    feature_combinations,
     min_patch_dims,
     max_patch_dims,
     dim_contiguous,
@@ -140,10 +149,10 @@
     boundary,
     feature_weight,
 )
-splitter.init_test(X, y, sample_weight)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
 
 # sample the projection matrix that consists of 1D patches
-proj_mat = splitter.sample_projection_matrix()
+proj_mat = splitter.sample_projection_matrix_py()
 
 # Visualize 2D patches
 fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(12, 8), sharex=True, sharey=True, squeeze=True)
@@ -181,7 +190,8 @@
     min_samples_leaf,
     min_weight_leaf,
     random_state,
-    missing_value_feature_mask,
+    monotonic_cst,
+    feature_combinations,
     min_patch_dims,
     max_patch_dims,
     dim_contiguous,
@@ -189,10 +199,10 @@
     boundary,
     feature_weight,
 )
-splitter.init_test(X, y, sample_weight)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
 
 # sample the projection matrix that consists of 1D patches
-proj_mat = splitter.sample_projection_matrix()
+proj_mat = splitter.sample_projection_matrix_py()
 print(proj_mat.shape)
 
 fig = plt.figure()
@@ -244,7 +254,8 @@
     min_samples_leaf,
     min_weight_leaf,
     random_state,
-    missing_value_feature_mask,
+    monotonic_cst,
+    feature_combinations,
     min_patch_dims,
     max_patch_dims,
     dim_contiguous,
@@ -252,10 +263,10 @@
     boundary,
     feature_weight,
 )
-splitter.init_test(X, y, sample_weight)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
 
 # sample the projection matrix that consists of 1D patches
-proj_mat = splitter.sample_projection_matrix()
+proj_mat = splitter.sample_projection_matrix_py()
 
 # Visualize 2D patches
 fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(12, 8), sharex=True, sharey=True, squeeze=True)
@@ -282,7 +293,8 @@
     min_samples_leaf,
     min_weight_leaf,
     random_state,
-    missing_value_feature_mask,
+    monotonic_cst,
+    feature_combinations,
     min_patch_dims,
     max_patch_dims,
     dim_contiguous,
@@ -290,10 +302,10 @@
     boundary,
     feature_weight,
 )
-splitter.init_test(X, y, sample_weight)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
 
 # sample the projection matrix that consists of 1D patches
-proj_mat = splitter.sample_projection_matrix()
+proj_mat = splitter.sample_projection_matrix_py()
 
 # Visualize 2D patches
 fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(12, 8), sharex=True, sharey=True, squeeze=True)
diff --git a/examples/splitters/plot_sparse_projection_matrix.py b/examples/splitters/plot_sparse_projection_matrix.py
new file mode 100644
index 000000000..6eb53e0ca
--- /dev/null
+++ b/examples/splitters/plot_sparse_projection_matrix.py
@@ -0,0 +1,129 @@
+"""
+======================================================
+Plot the sparse projection matrices of an oblique tree
+======================================================
+
+This example shows how projection matrices are generated for an oblique tree,
+specifically the :class:`sktree.tree.ObliqueDecisionTreeClassifier`.
+
+The projection matrix here samples a subset of features from the input ``X``
+controlled by the parameter ``feature_combinations``. The projection matrix
+is sparse when ``feature_combinations`` is small, meaning that it is mostly zero.
+The non-zero elements of the projection matrix are the features that are sampled
+from the input ``X`` and linearly combined to form candidate split dimensions.
+
+For details on how to use the hyperparameters related to the patches, see
+:class:`sktree.tree.ObliqueDecisionTreeClassifier`.
+"""
+import matplotlib.pyplot as plt
+
+# import modules
+# .. note:: We use a private Cython module here to demonstrate what the patches
+#           look like. This is not part of the public API. The Cython module used
+#           is just a Python wrapper for the underlying Cython code and is not the
+#           same as the Cython splitter used in the actual implementation.
+#           To use the actual splitter, one should use the public API for the
+#           relevant tree/forests class.
+import numpy as np
+from matplotlib.cm import ScalarMappable
+from matplotlib.colors import ListedColormap
+
+from sktree._lib.sklearn.tree._criterion import Gini
+from sktree.tree._oblique_splitter import BestObliqueSplitterTester
+
+# %%
+# Initialize patch splitter
+# -------------------------
+# The patch splitter is used to generate patches for the projection matrices.
+# We will initialize the patch with some dummy values for the sake of this
+# example.
+
+criterion = Gini(1, np.array((0, 1)))
+max_features = 6
+min_samples_leaf = 1
+min_weight_leaf = 0.0
+random_state = np.random.RandomState(1)
+
+feature_combinations = 1.5
+monotonic_cst = None
+missing_value_feature_mask = None
+
+n_features = 10
+n_samples = 5
+
+# initialize some dummy data
+X = np.ones((n_samples, n_features), dtype=np.float32)
+y = np.array([0, 0, 0, 1, 1]).reshape(-1, 1).astype(np.float64)
+sample_weight = np.ones(5)
+
+print("The shape of our dataset is: ", X.shape, y.shape, sample_weight.shape)
+
+# %%
+# Initialize the splitter
+# -----------------------
+# The splitter is initialized in the decision-tree classes, but we expose
+# a testing interface here to demonstrate how the projection matrices are
+# sampled internally.
+#
+# .. warning:: Do not use this interface directly in practice.
+
+splitter = BestObliqueSplitterTester(
+    criterion,
+    max_features,
+    min_samples_leaf,
+    min_weight_leaf,
+    random_state,
+    monotonic_cst,
+    feature_combinations,
+)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
+
+# %%
+# Generate projection matrix
+# --------------------------
+# Sample the projection matrix that consists of randomly sampled features
+# with an average of ``feature_combinations * max_features`` non-zeros
+# in the ``(max_features, n_features)`` matrix.
+
+projection_matrix = splitter.sample_projection_matrix_py()
+print(projection_matrix.shape)
+
+# Visualize the projection matrix
+cmap = ListedColormap(["orange", "white", "green"])
+
+# Create a heatmap to visualize the indices
+fig, ax = plt.subplots(figsize=(6, 6))
+
+ax.imshow(projection_matrix, cmap=cmap, aspect=n_features / max_features, interpolation="none")
+
+ax.set(title="Sampled Projection Matrix", xlabel="Feature Index", ylabel="Projection Vector Index")
+ax.set_xticks(np.arange(n_features))
+ax.set_yticks(np.arange(max_features))
+ax.set_yticklabels(np.arange(max_features, dtype=int) + 1)
+ax.set_xticklabels(np.arange(n_features, dtype=int) + 1)
+
+# Create a mappable object
+sm = ScalarMappable(cmap=cmap)
+sm.set_array([])  # You can set an empty array or values here
+
+# Create a color bar with labels for each feature set
+colorbar = fig.colorbar(sm, ax=ax, ticks=[0, 0.5, 1], format="%d")
+colorbar.set_label("Projection Weight")
+colorbar.ax.set_yticklabels(["-1", "0", "1"])
+
+plt.show()
+
+# %%
+# Discussion
+# ----------
+# As we can see, the (sparse) oblique splitter samples random features to
+# linearly combine to form candidate split dimensions.
+#
+# In contrast, the normal splitter in :class:`sklearn.tree.DecisionTreeClassifier` samples
+# randomly across all ``n_features`` features.
+#
+# For an example of using oblique trees/forests in practice on data, see the following
+# examples:
+#
+# - :ref:`sphx_glr_auto_examples_plot_oblique_forests_iris.py`
+# - :ref:`sphx_glr_auto_examples_plot_oblique_random_forest.py`
diff --git a/sktree/__init__.py b/sktree/__init__.py
index 1aa58557f..1ca249fc0 100644
--- a/sktree/__init__.py
+++ b/sktree/__init__.py
@@ -44,7 +44,7 @@
             ExtraTreesRegressor,
         )
         from .neighbors import NearestNeighborsMetaEstimator
-        from .ensemble import ExtendedIsolationForest
+        from .ensemble import ExtendedIsolationForest, MultiViewRandomForestClassifier
         from .ensemble._unsupervised_forest import (
             UnsupervisedRandomForest,
             UnsupervisedObliqueRandomForest,
@@ -85,4 +85,5 @@
         "ExtraTreesClassifier",
         "ExtraTreesRegressor",
         "ExtendedIsolationForest",
+        "MultiViewRandomForestClassifier",
     ]
diff --git a/sktree/cv.py b/sktree/cv.py
new file mode 100644
index 000000000..cd38e7793
--- /dev/null
+++ b/sktree/cv.py
@@ -0,0 +1,55 @@
+from typing import Any, List, Tuple
+
+from numpy.typing import ArrayLike
+from sklearn.base import clone
+from sklearn.model_selection import BaseCrossValidator, StratifiedKFold
+
+from sktree._lib.sklearn.ensemble._forest import BaseForest
+
+
+def cross_fit_forest(
+    forest: BaseForest,
+    X: ArrayLike,
+    y: ArrayLike,
+    cv: BaseCrossValidator = None,
+    n_splits: int = 5,
+    random_state=None,
+) -> Tuple[List[Any], List[ArrayLike]]:
+    """Perform cross-fitting of a forest estimator.
+
+    Parameters
+    ----------
+    forest : BaseForest
+        Forest.
+    X : ArrayLike of shape (n_samples, n_features)
+        Input data.
+    y : ArrayLike of shape (n_samples, [n_outputs])
+        Target data.
+    cv : BaseCrossValidator, optional
+        Cross validation object, by default None, which defaults to
+        :class:`sklearn.model_selection.StratifiedKFold`.
+    n_splits : int, optional
+        Number of folds to generate, by default 5.
+    random_state : int, optional
+        Random seed.
+
+    Returns
+    -------
+    fitted_forests : List[BaseForest]
+        List of fitted forests.
+    test_indices : List[ArrayLike]
+        List of test indices over ``n_samples``.
+    """
+    if cv is None:
+        cv = StratifiedKFold(n_splits=n_splits, random_state=random_state, shuffle=True)
+
+    fitted_forests = []
+    test_indices = []
+    for train_index, test_index in cv.split(X, y):
+        new_forest = clone(forest)
+        new_forest.fit(X[train_index], y[train_index])
+
+        fitted_forests.append(new_forest)
+        test_indices.append(test_index)
+
+    return fitted_forests, test_indices
diff --git a/sktree/ensemble/__init__.py b/sktree/ensemble/__init__.py
index 1345565c0..aa97d0215 100644
--- a/sktree/ensemble/__init__.py
+++ b/sktree/ensemble/__init__.py
@@ -1,5 +1,6 @@
 from ._eiforest import ExtendedIsolationForest
 from ._honest_forest import HonestForestClassifier
+from ._multiview import MultiViewRandomForestClassifier
 from ._supervised_forest import (
     ExtraObliqueRandomForestClassifier,
     ExtraObliqueRandomForestRegressor,
diff --git a/sktree/ensemble/_multiview.py b/sktree/ensemble/_multiview.py
new file mode 100644
index 000000000..a33ec6047
--- /dev/null
+++ b/sktree/ensemble/_multiview.py
@@ -0,0 +1,308 @@
+from sklearn.utils._param_validation import StrOptions
+
+from .._lib.sklearn.ensemble._forest import ForestClassifier
+from ..tree import MultiViewDecisionTreeClassifier
+from ..tree._neighbors import SimMatrixMixin
+
+
+class MultiViewRandomForestClassifier(SimMatrixMixin, ForestClassifier):
+    """
+    A multi-view axis-aligned random forest classifier.
+
+    A multi-view random forest is a meta estimator similar to a random
+    forest that fits a number of multi-view decision tree classifiers
+    on various sub-samples of the dataset and uses averaging to
+    improve the predictive accuracy and control over-fitting.
+
+    Parameters
+    ----------
+    n_estimators : int, default=100
+        The number of trees in the forest.
+
+    criterion : {"gini", "entropy"}, default="gini"
+        The function to measure the quality of a split. Supported criteria are
+        "gini" for the Gini impurity and "entropy" for the information gain.
+        Note: this parameter is tree-specific.
+
+    max_depth : int, default=None
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+
+    min_samples_split : int or float, default=2
+        The minimum number of samples required to split an internal node:
+
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
+          `ceil(min_samples_split * n_samples)` are the minimum
+          number of samples for each split.
+
+    min_samples_leaf : int or float, default=1
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
+          `ceil(min_samples_leaf * n_samples)` are the minimum
+          number of samples for each node.
+
+    min_weight_fraction_leaf : float, default=0.0
+        The minimum weighted fraction of the sum total of weights (of all
+        the input samples) required to be at a leaf node. Samples have
+        equal weight when sample_weight is not provided.
+
+    max_features : {"sqrt", "log2", None}, int or float, default="sqrt"
+        The number of features to consider when looking for the best split:
+
+        - If int, then consider `max_features` features at each split.
+        - If float, then `max_features` is a fraction and
+          `round(max_features * n_features)` features are considered at each
+          split.
+        - If "auto", then `max_features=sqrt(n_features)`.
+        - If "sqrt", then `max_features=sqrt(n_features)`.
+        - If "log2", then `max_features=log2(n_features)`.
+        - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    max_leaf_nodes : int, default=None
+        Grow trees with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+
+    min_impurity_decrease : float, default=0.0
+        A node will be split if this split induces a decrease of the impurity
+        greater than or equal to this value.
+
+        The weighted impurity decrease equation is the following::
+
+            N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                - N_t_L / N_t * left_impurity)
+
+        where ``N`` is the total number of samples, ``N_t`` is the number of
+        samples at the current node, ``N_t_L`` is the number of samples in the
+        left child, and ``N_t_R`` is the number of samples in the right child.
+
+        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+        if ``sample_weight`` is passed.
+
+    bootstrap : bool, default=True
+        Whether bootstrap samples are used when building trees. If False, the
+        whole dataset is used to build each tree.
+
+    oob_score : bool, default=False
+        Whether to use out-of-bag samples to estimate the generalization score.
+        Only available if bootstrap=True.
+
+    n_jobs : int, default=None
+        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,
+        :meth:`decision_path` and :meth:`apply` are all parallelized over the
+        trees. ``None`` means 1 unless in a `joblib.parallel_backend`
+        context. ``-1`` means using all processors. See :term:`Glossary
+        <n_jobs>` for more details.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls both the randomness of the bootstrapping of the samples used
+        when building trees (if ``bootstrap=True``) and the sampling of the
+        features to consider when looking for the best split at each node
+        (if ``max_features < n_features``).
+        See :term:`Glossary <random_state>` for details.
+
+    verbose : int, default=0
+        Controls the verbosity when fitting and predicting.
+
+    warm_start : bool, default=False
+        When set to ``True``, reuse the solution of the previous call to fit
+        and add more estimators to the ensemble, otherwise, just fit a whole
+        new forest. See :term:`the Glossary <warm_start>`.
+
+    class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \
+            default=None
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
+        [{1:1}, {2:5}, {3:1}, {4:1}].
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+        The "balanced_subsample" mode is the same as "balanced" except that
+        weights are computed based on the bootstrap sample for every tree
+        grown.
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
+    max_samples : int or float, default=None
+        If bootstrap is True, the number of samples to draw from X
+        to train each base estimator.
+
+        - If None (default), then draw `X.shape[0]` samples.
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples. Thus,
+          `max_samples` should be in the interval `(0.0, 1.0]`.
+
+    feature_combinations : float, default=None
+        The number of features to combine on average at each split
+        of the decision trees. If ``None``, then will default to the minimum of
+        ``(1.5, n_features)``. This controls the number of non-zeros is the
+        projection matrix. Setting the value to 1.0 is equivalent to a
+        traditional decision-tree. ``feature_combinations * max_features``
+        gives the number of expected non-zeros in the projection matrix of shape
+        ``(max_features, n_features)``. Thus this value must always be less than
+        ``n_features`` in order to be valid.
+
+    feature_set_ends : array-like of int of shape (n_feature_sets,), default=None
+        The indices of the end of each feature set. For example, if the first
+        feature set is the first 10 features, and the second feature set is the
+        next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
+        then this will assume that there is only one feature set.
+
+    Attributes
+    ----------
+    base_estimator_ : sktree.tree.ObliqueDecisionTreeClassifier
+        The child estimator template used to create the collection of fitted
+        sub-estimators.
+
+    estimators_ : list of sktree.tree.ObliqueDecisionTreeClassifier
+        The collection of fitted sub-estimators.
+
+    classes_ : ndarray of shape (n_classes,) or a list of such arrays
+        The classes labels (single output problem), or a list of arrays of
+        class labels (multi-output problem).
+
+    n_classes_ : int or list
+        The number of classes (single output problem), or a list containing the
+        number of classes for each output (multi-output problem).
+
+    n_features_ : int
+        The number of features when ``fit`` is performed.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    n_outputs_ : int
+        The number of outputs when ``fit`` is performed.
+
+    feature_importances_ : ndarray of shape (n_features,)
+        The impurity-based feature importances.
+        The higher, the more important the feature.
+        The importance of a feature is computed as the (normalized)
+        total reduction of the criterion brought by that feature.  It is also
+        known as the Gini importance.
+
+        Warning: impurity-based feature importances can be misleading for
+        high cardinality features (many unique values). See
+        :func:`sklearn.inspection.permutation_importance` as an alternative.
+
+    oob_score_ : float
+        Score of the training dataset obtained using an out-of-bag estimate.
+        This attribute exists only when ``oob_score`` is True.
+
+    oob_decision_function_ : ndarray of shape (n_samples, n_classes) or \
+            (n_samples, n_classes, n_outputs)
+        Decision function computed with out-of-bag estimate on the training
+        set. If n_estimators is small it might be possible that a data point
+        was never left out during the bootstrap. In this case,
+        `oob_decision_function_` might contain NaN. This attribute exists
+        only when ``oob_score`` is True.
+
+    See Also
+    --------
+    sktree.tree.ObliqueDecisionTreeClassifier : An oblique decision
+        tree classifier.
+    sklearn.ensemble.RandomForestClassifier : An axis-aligned decision
+        forest classifier.
+    """
+
+    tree_type = "oblique"
+    _parameter_constraints: dict = {
+        **ForestClassifier._parameter_constraints,
+        **MultiViewDecisionTreeClassifier._parameter_constraints,
+        "class_weight": [
+            StrOptions({"balanced_subsample", "balanced"}),
+            dict,
+            list,
+            None,
+        ],
+    }
+    _parameter_constraints.pop("splitter")
+
+    def __init__(
+        self,
+        n_estimators=100,
+        *,
+        criterion="gini",
+        max_depth=None,
+        min_samples_split=2,
+        min_samples_leaf=1,
+        min_weight_fraction_leaf=0.0,
+        max_features="sqrt",
+        max_leaf_nodes=None,
+        min_impurity_decrease=0.0,
+        bootstrap=True,
+        oob_score=False,
+        n_jobs=None,
+        random_state=None,
+        verbose=0,
+        warm_start=False,
+        class_weight=None,
+        max_samples=None,
+        feature_combinations=None,
+        feature_set_ends=None,
+    ):
+        super().__init__(
+            estimator=MultiViewDecisionTreeClassifier(),
+            n_estimators=n_estimators,
+            estimator_params=(
+                "criterion",
+                "max_depth",
+                "min_samples_split",
+                "min_samples_leaf",
+                "min_weight_fraction_leaf",
+                "max_features",
+                "max_leaf_nodes",
+                "min_impurity_decrease",
+                "random_state",
+                "feature_combinations",
+                "feature_set_ends",
+            ),
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start,
+            class_weight=class_weight,
+            max_samples=max_samples,
+        )
+        self.criterion = criterion
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.max_features = max_features
+        self.feature_combinations = feature_combinations
+        self.feature_set_ends = feature_set_ends
+
+        # unused by oblique forests
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_impurity_decrease = min_impurity_decrease
diff --git a/sktree/ensemble/meson.build b/sktree/ensemble/meson.build
index a27293dd9..a8df74f57 100644
--- a/sktree/ensemble/meson.build
+++ b/sktree/ensemble/meson.build
@@ -3,7 +3,8 @@ python_sources = [
   '_supervised_forest.py',
   '_unsupervised_forest.py',
   '_honest_forest.py',
-  '_eiforest.py'
+  '_eiforest.py',
+  '_multiview.py',
 ]
 
 py3.install_sources(
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index aa9e81e9b..4d6dc7b77 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -332,9 +332,6 @@ def statistic(
         if self._type_of_target_ is None:
             self._type_of_target_ = type_of_target(y)
 
-        # if self.sample_dataset_per_tree and not self.permute_per_tree:
-        #     raise ValueError("sample_dataset_per_tree is only valid when permute_per_tree=True")
-
         if covariate_index is None:
             self.estimator_ = self._get_estimator()
             estimator = self.estimator_
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index c16eecbe2..cecf34b8c 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -291,8 +291,10 @@ def test_pickle(tmpdir):
         assert_array_equal(getattr(clf, attr), getattr(clf_pickle, attr))
 
 
-@pytest.mark.parametrize("permute_per_tree", [True, False])
-@pytest.mark.parametrize("sample_dataset_per_tree", [True, False])
+@pytest.mark.parametrize("permute_per_tree", [True, False], ids=["permute_per_tree", "no_permute"])
+@pytest.mark.parametrize(
+    "sample_dataset_per_tree", [True, False], ids=["sample_dataset_per_tree", "no_sample_dataset"]
+)
 def test_sample_size_consistency_of_estimator_indices_(permute_per_tree, sample_dataset_per_tree):
     """Test that the test-sample indices are what is expected."""
     clf = FeatureImportanceForestClassifier(
@@ -313,10 +315,25 @@ def test_sample_size_consistency_of_estimator_indices_(permute_per_tree, sample_
         X, y, covariate_index=None, return_posteriors=True, metric="mi"
     )
     if sample_dataset_per_tree:
-        assert_array_equal(
-            sorted(np.unique(samples)),
-            sorted(np.unique(np.concatenate([x[1] for x in clf.train_test_samples_]).flatten())),
+        # check the non-nans
+        non_nan_idx = _non_nan_samples(posteriors)
+        assert clf.n_samples_test_ == n_samples, f"{clf.n_samples_test_} != {n_samples}"
+
+        sorted_sample_idx = sorted(np.unique(samples))
+        sorted_est_samples_idx = sorted(
+            np.unique(np.concatenate([x[1] for x in clf.train_test_samples_]).flatten())
         )
+        assert_array_equal(sorted_sample_idx, non_nan_idx)
+
+        # the sample indices are equal to the output of the train/test indices_
+        # only if there are no nans in the posteriors over all the samples
+        if np.sum(non_nan_idx) == n_samples:
+            assert_array_equal(
+                sorted_sample_idx,
+                sorted_est_samples_idx,
+                f"{set(sorted_sample_idx) - set(sorted_est_samples_idx)} and "
+                f"{set(sorted_est_samples_idx) - set(sorted_sample_idx)}",
+            )
     else:
         assert_array_equal(samples, sorted(clf.train_test_samples_[0][1]))
     assert len(_non_nan_samples(posteriors)) == len(samples)
@@ -430,7 +447,7 @@ def test_small_dataset_dependent(seed):
 
     clf = FeatureImportanceForestClassifier(
         estimator=HonestForestClassifier(
-            n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.5
+            n_estimators=50, random_state=seed, n_jobs=1, honest_fraction=0.5
         ),
         test_size=0.2,
         permute_per_tree=False,
diff --git a/sktree/tests/meson.build b/sktree/tests/meson.build
index 6ccd37305..6fc67af5a 100644
--- a/sktree/tests/meson.build
+++ b/sktree/tests/meson.build
@@ -4,7 +4,8 @@ python_sources = [
   'test_unsupervised_forest.py',
   'test_neighbors.py',
   'test_honest_forest.py',
-  'test_eiforest.py'
+  'test_eiforest.py',
+  'test_multiview_forest.py',
 ]
 
 py3.install_sources(
diff --git a/sktree/tests/test_multiview_forest.py b/sktree/tests/test_multiview_forest.py
new file mode 100644
index 000000000..dad4a7a14
--- /dev/null
+++ b/sktree/tests/test_multiview_forest.py
@@ -0,0 +1,89 @@
+import numpy as np
+import pytest
+from sklearn.datasets import make_blobs
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import cross_val_score
+from sklearn.utils.estimator_checks import parametrize_with_checks
+
+from sktree import MultiViewRandomForestClassifier, RandomForestClassifier
+
+seed = 12345
+
+
+@parametrize_with_checks(
+    [
+        MultiViewRandomForestClassifier(random_state=12345, n_estimators=10),
+    ]
+)
+def test_sklearn_compatible_estimator(estimator, check):
+    check(estimator)
+
+
+@pytest.mark.parametrize("baseline_est", [MultiViewRandomForestClassifier, RandomForestClassifier])
+def test_multiview_classification(baseline_est):
+    """Test that explicit knowledge of multi-view structure improves classification accuracy.
+
+    In very high-dimensional noise setting across two views, when the max_depth and max_features
+    are constrained, the multi-view random forests will still obtain perfect accuracy, while
+    the single-view random forest will not.
+    """
+    rng = np.random.default_rng(seed=seed)
+
+    n_samples = 100
+    n_estimators = 20
+    n_features_1 = 5
+    n_features_2 = 1000
+    cluster_std = 2.0
+
+    # Create a high-dimensional multiview dataset with a low-dimensional informative
+    # subspace in one view of the dataset.
+    X0_first, y0 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    X1_first, y1 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+    y1[:] = 1
+    X0 = np.concatenate([X0_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X1 = np.concatenate([X1_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X = np.vstack((X0, X1))
+    y = np.hstack((y0, y1)).T
+
+    # Compare multiview decision tree vs single-view decision tree
+    clf = MultiViewRandomForestClassifier(
+        random_state=seed,
+        feature_set_ends=[n_features_1, X.shape[1]],
+        max_features="sqrt",
+        max_depth=5,
+        n_estimators=n_estimators,
+    )
+    clf.fit(X, y)
+    assert (
+        accuracy_score(y, clf.predict(X)) == 1.0
+    ), f"Accuracy score: {accuracy_score(y, clf.predict(X))}"
+    assert (
+        cross_val_score(clf, X, y, cv=5).mean() == 1.0
+    ), f"CV score: {cross_val_score(clf, X, y, cv=5).mean()}"
+
+    clf = baseline_est(
+        random_state=seed,
+        max_depth=5,
+        max_features="sqrt",
+        n_estimators=n_estimators,
+    )
+    clf.fit(X, y)
+    assert (
+        accuracy_score(y, clf.predict(X)) == 1.0
+    ), f"Accuracy score: {accuracy_score(y, clf.predict(X))}"
+    assert (
+        cross_val_score(clf, X, y, cv=5).mean() <= 0.95
+    ), f"CV score: {cross_val_score(clf, X, y, cv=5).mean()}"
diff --git a/sktree/tests/test_supervised_forest.py b/sktree/tests/test_supervised_forest.py
index 045653490..291d8a2e9 100644
--- a/sktree/tests/test_supervised_forest.py
+++ b/sktree/tests/test_supervised_forest.py
@@ -248,7 +248,8 @@ def test_oblique_forest_orthant():
         random_state=0,
     )
 
-    rc_clf = ObliqueRandomForestClassifier(max_features=None, random_state=0)
+    n_features = X.shape[1]
+    rc_clf = ObliqueRandomForestClassifier(max_features=int(n_features * 1.5), random_state=0)
     rc_clf.fit(X_train, y_train)
     y_hat = rc_clf.predict(X_test)
     rc_accuracy = accuracy_score(y_test, y_hat)
@@ -258,7 +259,7 @@ def test_oblique_forest_orthant():
     y_hat = ri_clf.predict(X_test)
     ri_accuracy = accuracy_score(y_test, y_hat)
 
-    assert rc_accuracy >= ri_accuracy
+    assert rc_accuracy >= ri_accuracy, f"{rc_accuracy} < {ri_accuracy}"
     assert ri_accuracy > 0.84
     assert rc_accuracy > 0.85
 
@@ -442,4 +443,6 @@ def test_regression(forest, criterion, dtype):
     y = y_large_reg.astype(dtype, copy=False)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=n_test, random_state=123)
     estimator.fit(X_train, y_train)
-    assert estimator.score(X_test, y_test) > 0.88, f"Failed for {estimator} and {criterion}"
+
+    test_score = estimator.score(X_test, y_test)
+    assert test_score > 0.85, f"Failed for {estimator} and {criterion}: {test_score}"
diff --git a/sktree/tree/__init__.py b/sktree/tree/__init__.py
index 1ed18f3f0..797338ac3 100644
--- a/sktree/tree/__init__.py
+++ b/sktree/tree/__init__.py
@@ -15,6 +15,7 @@
     UnsupervisedObliqueDecisionTree,
 )
 from ._honest_tree import HonestTreeClassifier
+from ._multiview import MultiViewDecisionTreeClassifier
 from ._neighbors import compute_forest_similarity_matrix
 
 __all__ = [
@@ -32,4 +33,5 @@
     "DecisionTreeRegressor",
     "ExtraTreeClassifier",
     "ExtraTreeRegressor",
+    "MultiViewDecisionTreeClassifier",
 ]
diff --git a/sktree/tree/_classes.py b/sktree/tree/_classes.py
index a9e00c6d0..4763bb53d 100644
--- a/sktree/tree/_classes.py
+++ b/sktree/tree/_classes.py
@@ -53,6 +53,7 @@
 OBLIQUE_DENSE_SPLITTERS = {
     "best": _oblique_splitter.BestObliqueSplitter,
     "random": _oblique_splitter.RandomObliqueSplitter,
+    "best-multiview": _oblique_splitter.MultiViewSplitter,
 }
 
 PATCH_DENSE_SPLITTERS = {
@@ -1708,6 +1709,9 @@ def _build_tree(
         random_state : int, RandomState instance or None, default=None
             Controls the randomness of the estimator.
         """
+        # Not used. Here for API compatibility
+        self.feature_combinations_ = 1
+
         if self.feature_weight is not None:
             self.feature_weight = self._validate_data(
                 self.feature_weight, ensure_2d=True, dtype=DTYPE
@@ -1797,6 +1801,7 @@ def _build_tree(
                 min_weight_leaf,
                 random_state,
                 monotonic_cst,
+                self.feature_combinations_,
                 self.min_patch_dims_,
                 self.max_patch_dims_,
                 self.dim_contiguous_,
@@ -2170,6 +2175,9 @@ def _build_tree(
         random_state : int, RandomState instance or None, default=None
             Controls the randomness of the estimator.
         """
+        # Not used. Here for API compatibility
+        self.feature_combinations_ = 1
+
         if self.feature_weight is not None:
             self.feature_weight = self._validate_data(
                 self.feature_weight, ensure_2d=True, dtype=DTYPE
@@ -2260,6 +2268,7 @@ def _build_tree(
                 min_weight_leaf,
                 random_state,
                 monotonic_cst,
+                self.feature_combinations_,
                 self.min_patch_dims_,
                 self.max_patch_dims_,
                 self.dim_contiguous_,
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
new file mode 100644
index 000000000..58b370db0
--- /dev/null
+++ b/sktree/tree/_multiview.py
@@ -0,0 +1,403 @@
+import copy
+from numbers import Real
+
+import numpy as np
+from scipy.sparse import issparse
+from sklearn.utils._param_validation import Interval
+
+from .._lib.sklearn.tree import DecisionTreeClassifier, _criterion
+from .._lib.sklearn.tree import _tree as _sklearn_tree
+from .._lib.sklearn.tree._criterion import BaseCriterion
+from .._lib.sklearn.tree._tree import BestFirstTreeBuilder, DepthFirstTreeBuilder
+from . import _oblique_splitter
+from ._neighbors import SimMatrixMixin
+from ._oblique_splitter import ObliqueSplitter
+from ._oblique_tree import ObliqueTree
+
+DTYPE = _sklearn_tree.DTYPE
+DOUBLE = _sklearn_tree.DOUBLE
+
+CRITERIA_CLF = {
+    "gini": _criterion.Gini,
+    "log_loss": _criterion.Entropy,
+    "entropy": _criterion.Entropy,
+}
+CRITERIA_REG = {
+    "squared_error": _criterion.MSE,
+    "friedman_mse": _criterion.FriedmanMSE,
+    "absolute_error": _criterion.MAE,
+    "poisson": _criterion.Poisson,
+}
+
+DENSE_SPLITTERS = {
+    "best": _oblique_splitter.MultiViewSplitter,
+}
+
+
+class MultiViewDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
+    """A multi-view axis-aligned decision tree classifier.
+
+    This is an experimental feature that applies an oblique decision tree to
+    multiple feature-sets concatenated across columns in ``X``.
+
+    Parameters
+    ----------
+    criterion : {"gini", "entropy"}, default="gini"
+        The function to measure the quality of a split. Supported criteria are
+        "gini" for the Gini impurity and "entropy" for the information gain.
+
+    splitter : {"best"}, default="best"
+        The strategy used to choose the split at each node.
+
+    max_depth : int, default=None
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+
+    min_samples_split : int or float, default=2
+        The minimum number of samples required to split an internal node:
+
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
+          `ceil(min_samples_split * n_samples)` are the minimum
+          number of samples for each split.
+
+    min_samples_leaf : int or float, default=1
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
+          `ceil(min_samples_leaf * n_samples)` are the minimum
+          number of samples for each node.
+
+    min_weight_fraction_leaf : float, default=0.0
+        The minimum weighted fraction of the sum total of weights (of all
+        the input samples) required to be at a leaf node. Samples have
+        equal weight when sample_weight is not provided.
+
+    max_features : int, float or {"auto", "sqrt", "log2"}, default=None
+        The number of features to consider when looking for the best split:
+
+            - If int, then consider `max_features` features at each split.
+            - If float, then `max_features` is a fraction and
+              `int(max_features * n_features)` features are considered at each
+              split.
+            - If "auto", then `max_features=sqrt(n_features)`.
+            - If "sqrt", then `max_features=sqrt(n_features)`.
+            - If "log2", then `max_features=log2(n_features)`.
+            - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+        Note: Compared to axis-aligned Random Forests, one can set
+        max_features to a number greater then ``n_features``.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the randomness of the estimator. The features are always
+        randomly permuted at each split, even if ``splitter`` is set to
+        ``"best"``. When ``max_features < n_features``, the algorithm will
+        select ``max_features`` at random at each split before finding the best
+        split among them. But the best found split may vary across different
+        runs, even if ``max_features=n_features``. That is the case, if the
+        improvement of the criterion is identical for several splits and one
+        split has to be selected at random. To obtain a deterministic behaviour
+        during fitting, ``random_state`` has to be fixed to an integer.
+        See :term:`Glossary <random_state>` for details.
+
+    max_leaf_nodes : int, default=None
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+
+    min_impurity_decrease : float, default=0.0
+        A node will be split if this split induces a decrease of the impurity
+        greater than or equal to this value.
+
+        The weighted impurity decrease equation is the following::
+
+            N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                - N_t_L / N_t * left_impurity)
+
+        where ``N`` is the total number of samples, ``N_t`` is the number of
+        samples at the current node, ``N_t_L`` is the number of samples in the
+        left child, and ``N_t_R`` is the number of samples in the right child.
+
+        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+        if ``sample_weight`` is passed.
+
+    class_weight : dict, list of dict or "balanced", default=None
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If None, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
+        [{1:1}, {2:5}, {3:1}, {4:1}].
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
+    feature_combinations : float, default=None
+        Not used.
+
+    ccp_alpha : non-negative float, default=0.0
+        Not used.
+
+    store_leaf_values : bool, default=False
+        Whether to store the leaf values.
+
+    monotonic_cst : array-like of int of shape (n_features), default=None
+        Indicates the monotonicity constraint to enforce on each feature.
+          - 1: monotonic increase
+          - 0: no constraint
+          - -1: monotonic decrease
+
+        Not used.
+
+    feature_set_ends : array-like of int of shape (n_feature_sets,), default=None
+        The indices of the end of each feature set. For example, if the first
+        feature set is the first 10 features, and the second feature set is the
+        next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
+        then this will assume that there is only one feature set.
+
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,) or list of ndarray
+        The classes labels (single output problem),
+        or a list of arrays of class labels (multi-output problem).
+
+    feature_importances_ : ndarray of shape (n_features,)
+        The impurity-based feature importances.
+        The higher, the more important the feature.
+        The importance of a feature is computed as the (normalized)
+        total reduction of the criterion brought by that feature.  It is also
+        known as the Gini importance [4]_.
+
+        Warning: impurity-based feature importances can be misleading for
+        high cardinality features (many unique values). See
+        :func:`sklearn.inspection.permutation_importance` as an alternative.
+
+    max_features_ : int
+        The inferred value of max_features.
+
+    n_classes_ : int or list of int
+        The number of classes (for single output problems),
+        or a list containing the number of classes for each
+        output (for multi-output problems).
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    n_outputs_ : int
+        The number of outputs when ``fit`` is performed.
+
+    tree_ : Tree instance
+        The underlying Tree object. Please refer to
+        ``help(sklearn.tree._tree.Tree)`` for
+        attributes of Tree object.
+
+    feature_combinations_ : float
+        The number of feature combinations on average taken to fit the tree.
+
+    feature_set_ends_ : array-like of int of shape (n_feature_sets,)
+        The indices of the end of each feature set.
+
+    n_feature_sets_ : int
+        The number of feature sets.
+
+    See Also
+    --------
+    sklearn.tree.DecisionTreeClassifier : An axis-aligned decision tree classifier.
+    """
+
+    tree_type = "oblique"
+
+    _parameter_constraints = {
+        **DecisionTreeClassifier._parameter_constraints,
+        "feature_combinations": [
+            Interval(Real, 1.0, None, closed="left"),
+            None,
+        ],
+    }
+
+    def __init__(
+        self,
+        *,
+        criterion="gini",
+        splitter="best",
+        max_depth=None,
+        min_samples_split=2,
+        min_samples_leaf=1,
+        min_weight_fraction_leaf=0.0,
+        max_features=None,
+        random_state=None,
+        max_leaf_nodes=None,
+        min_impurity_decrease=0.0,
+        class_weight=None,
+        feature_combinations=None,
+        ccp_alpha=0.0,
+        store_leaf_values=False,
+        monotonic_cst=None,
+        feature_set_ends=None,
+    ):
+        super().__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            max_leaf_nodes=max_leaf_nodes,
+            class_weight=class_weight,
+            random_state=random_state,
+            min_impurity_decrease=min_impurity_decrease,
+            ccp_alpha=ccp_alpha,
+            store_leaf_values=store_leaf_values,
+            monotonic_cst=monotonic_cst,
+        )
+
+        self.feature_combinations = feature_combinations
+        self.feature_set_ends = feature_set_ends
+
+    def _build_tree(
+        self,
+        X,
+        y,
+        sample_weight,
+        missing_values_in_feature_mask,
+        min_samples_leaf,
+        min_weight_leaf,
+        max_leaf_nodes,
+        min_samples_split,
+        max_depth,
+        random_state,
+    ):
+        """Build the actual tree.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csc_matrix``.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            The target values (class labels) as integers or strings.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. Splits are also
+            ignored if they would result in any single class carrying a
+            negative weight in either child node.
+
+        min_samples_leaf : int or float
+            The minimum number of samples required to be at a leaf node.
+
+        min_weight_leaf : float, default=0.0
+           The minimum weighted fraction of the sum total of weights.
+
+        max_leaf_nodes : int, default=None
+            Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+
+        min_samples_split : int or float, default=2
+            The minimum number of samples required to split an internal node.
+
+        max_depth : int, default=None
+            The maximum depth of the tree. If None, then nodes are expanded until
+            all leaves are pure or until all leaves contain less than
+            min_samples_split samples.
+
+        random_state : int, RandomState instance or None, default=None
+            Controls the randomness of the estimator.
+        """
+        monotonic_cst = None
+        _, n_features = X.shape
+
+        self.feature_combinations_ = 1
+
+        # Build tree
+        criterion = self.criterion
+        if not isinstance(criterion, BaseCriterion):
+            criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self.n_classes_)
+        else:
+            # Make a deepcopy in case the criterion has mutable attributes that
+            # might be shared and modified concurrently during parallel fitting
+            criterion = copy.deepcopy(criterion)
+
+        if self.feature_set_ends is None:
+            self.feature_set_ends_ = np.asarray([n_features], dtype=np.intp)
+        else:
+            self.feature_set_ends_ = np.atleast_1d(self.feature_set_ends).astype(np.intp)
+        self.n_feature_sets_ = len(self.feature_set_ends_)
+
+        splitter = self.splitter
+        if issparse(X):
+            raise ValueError(
+                "Sparse input is not supported for oblique trees. "
+                "Please convert your data to a dense array."
+            )
+        else:
+            SPLITTERS = DENSE_SPLITTERS
+
+        if not isinstance(self.splitter, ObliqueSplitter):
+            splitter = SPLITTERS[self.splitter](
+                criterion,
+                self.max_features_,
+                min_samples_leaf,
+                min_weight_leaf,
+                random_state,
+                monotonic_cst,
+                self.feature_combinations_,
+                self.feature_set_ends_,
+                self.n_feature_sets_,
+            )
+
+        self.tree_ = ObliqueTree(self.n_features_in_, self.n_classes_, self.n_outputs_)
+
+        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
+        if max_leaf_nodes < 0:
+            self.builder_ = DepthFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                self.min_impurity_decrease,
+            )
+        else:
+            self.builder_ = BestFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                max_leaf_nodes,
+                self.min_impurity_decrease,
+            )
+
+        self.builder_.build(self.tree_, X, y, sample_weight, None)
+
+        if self.n_outputs_ == 1:
+            self.n_classes_ = self.n_classes_[0]
+            self.classes_ = self.classes_[0]
diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
index c9c833d90..3f7b84e43 100644
--- a/sktree/tree/_oblique_splitter.pxd
+++ b/sktree/tree/_oblique_splitter.pxd
@@ -20,6 +20,7 @@ from .._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
 from .._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
 from .._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
 from .._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
+from .._lib.sklearn.utils._typedefs cimport intp_t
 from ._sklearn_splitter cimport sort
 
 
@@ -88,6 +89,12 @@ cdef class BaseObliqueSplitter(Splitter):
         double upper_bound,
     ) except -1 nogil
 
+    cdef inline void fisher_yates_shuffle_memview(
+        self,
+        SIZE_t[::1] indices_to_sample,
+        SIZE_t grid_size,
+        UINT32_t* random_state
+    ) noexcept nogil
 
 cdef class ObliqueSplitter(BaseObliqueSplitter):
     # The splitter searches in the input space for a linear combination of features and a threshold
@@ -106,3 +113,69 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         vector[vector[DTYPE_t]]& proj_mat_weights,
         vector[vector[SIZE_t]]& proj_mat_indices
     ) noexcept nogil
+
+
+cdef class BestObliqueSplitter(ObliqueSplitter):
+    cdef int node_split(
+        self,
+        double impurity,   # Impurity of the node
+        SplitRecord* split,
+        SIZE_t* n_constant_features,
+        double lower_bound,
+        double upper_bound,
+    ) except -1 nogil
+
+
+cdef class RandomObliqueSplitter(ObliqueSplitter):
+    cdef void find_min_max(
+        self,
+        DTYPE_t[::1] feature_values,
+        DTYPE_t* min_feature_value_out,
+        DTYPE_t* max_feature_value_out,
+    ) noexcept nogil
+
+    cdef SIZE_t partition_samples(
+        self,
+        double current_threshold
+    ) noexcept nogil
+
+    cdef int node_split(
+        self,
+        double impurity,   # Impurity of the node
+        SplitRecord* split,
+        SIZE_t* n_constant_features,
+        double lower_bound,
+        double upper_bound,
+    ) except -1 nogil
+
+
+# XXX: This splitter is experimental. Expect changes frequently.
+cdef class MultiViewSplitter(BestObliqueSplitter):
+    cdef const intp_t[:] feature_set_ends   # an array indicating the column indices of the end of each feature set
+    cdef intp_t n_feature_sets                  # the number of feature sets is the length of feature_set_ends + 1
+
+    cdef vector[vector[SIZE_t]] multi_indices_to_sample
+
+    cdef void sample_proj_mat(
+        self,
+        vector[vector[DTYPE_t]]& proj_mat_weights,
+        vector[vector[SIZE_t]]& proj_mat_indices
+    ) noexcept nogil
+
+
+# XXX: This splitter is experimental. Expect changes frequently.
+cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
+    cdef const intp_t[:] feature_set_ends   # an array indicating the column indices of the end of each feature set
+    cdef intp_t n_feature_sets                  # the number of feature sets is the length of feature_set_ends + 1
+
+    # whether or not to uniformly sample feature-sets into each projection vector
+    # if True, then sample from each feature set for each projection vector
+    cdef bint uniform_sampling
+
+    cdef vector[vector[SIZE_t]] multi_indices_to_sample
+
+    cdef void sample_proj_mat(
+        self,
+        vector[vector[DTYPE_t]]& proj_mat_weights,
+        vector[vector[SIZE_t]]& proj_mat_indices
+    ) noexcept nogil
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index bc29cafa7..92aa612a7 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -124,150 +124,19 @@ cdef class BaseObliqueSplitter(Splitter):
                     feature_values[idx] = 0.0
                 feature_values[idx] += self.X[samples[idx], col_idx] * col_weight
 
-    cdef int node_split(
+    cdef inline void fisher_yates_shuffle_memview(
         self,
-        double impurity,
-        SplitRecord* split,
-        SIZE_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
-    ) except -1 nogil:
-        """Find the best_split split on node samples[start:end]
-
-        Returns -1 in case of failure to allocate memory (and raise MemoryError)
-        or 0 otherwise.
-        """
-        # typecast the pointer to an ObliqueSplitRecord
-        cdef ObliqueSplitRecord* oblique_split = <ObliqueSplitRecord*>(split)
-
-        # Draw random splits and pick the best_split
-        cdef SIZE_t[::1] samples = self.samples
-        cdef SIZE_t start = self.start
-        cdef SIZE_t end = self.end
-
-        # pointer array to store feature values to split on
-        cdef DTYPE_t[::1]  feature_values = self.feature_values
-        cdef SIZE_t max_features = self.max_features
-        cdef SIZE_t min_samples_leaf = self.min_samples_leaf
-
-        # keep track of split record for current_split node and the best_split split
-        # found among the sampled projection vectors
-        cdef ObliqueSplitRecord best_split, current_split
-        cdef double current_proxy_improvement = -INFINITY
-        cdef double best_proxy_improvement = -INFINITY
-
-        cdef SIZE_t feat_i, p       # index over computed features and start/end
-        cdef SIZE_t partition_end
-        cdef DTYPE_t temp_d         # to compute a projection feature value
-
-        # instantiate the split records
-        _init_split(&best_split, end)
-
-        # Sample the projection matrix
-        self.sample_proj_mat(self.proj_mat_weights, self.proj_mat_indices)
-
-        # For every vector in the projection matrix
-        for feat_i in range(max_features):
-            # Projection vector has no nonzeros
-            if self.proj_mat_weights[feat_i].empty():
-                continue
-
-            # XXX: 'feature' is not actually used in oblique split records
-            # Just indicates which split was sampled
-            current_split.feature = feat_i
-            current_split.proj_vec_weights = &self.proj_mat_weights[feat_i]
-            current_split.proj_vec_indices = &self.proj_mat_indices[feat_i]
-
-            # Compute linear combination of features and then
-            # sort samples according to the feature values.
-            self.compute_features_over_samples(
-                start,
-                end,
-                samples,
-                feature_values,
-                &self.proj_mat_weights[feat_i],
-                &self.proj_mat_indices[feat_i]
-            )
-
-            # Sort the samples
-            sort(&feature_values[start], &samples[start], end - start)
-
-            # Evaluate all splits
-            self.criterion.reset()
-            p = start
-            while p < end:
-                while (p + 1 < end and feature_values[p + 1] <= feature_values[p] + FEATURE_THRESHOLD):
-                    p += 1
-
-                p += 1
-
-                if p < end:
-                    current_split.pos = p
-
-                    # Reject if min_samples_leaf is not guaranteed
-                    if (((current_split.pos - start) < min_samples_leaf) or
-                            ((end - current_split.pos) < min_samples_leaf)):
-                        continue
-
-                    self.criterion.update(current_split.pos)
-                    # Reject if min_weight_leaf is not satisfied
-                    if self.check_postsplit_conditions() == 1:
-                        continue
-
-                    current_proxy_improvement = \
-                        self.criterion.proxy_impurity_improvement()
-
-                    if current_proxy_improvement > best_proxy_improvement:
-                        best_proxy_improvement = current_proxy_improvement
-                        # sum of halves is used to avoid infinite value
-                        current_split.threshold = feature_values[p - 1] / 2.0 + feature_values[p] / 2.0
-
-                        if (
-                            (current_split.threshold == feature_values[p]) or
-                            (current_split.threshold == INFINITY) or
-                            (current_split.threshold == -INFINITY)
-                        ):
-                            current_split.threshold = feature_values[p - 1]
-
-                        best_split = current_split  # copy
-
-        # Reorganize into samples[start:best_split.pos] + samples[best_split.pos:end]
-        if best_split.pos < end:
-            partition_end = end
-            p = start
-
-            while p < partition_end:
-                # Account for projection vector
-                temp_d = 0.0
-                for j in range(best_split.proj_vec_indices.size()):
-                    temp_d += self.X[samples[p], deref(best_split.proj_vec_indices)[j]] *\
-                                deref(best_split.proj_vec_weights)[j]
-
-                if temp_d <= best_split.threshold:
-                    p += 1
-
-                else:
-                    partition_end -= 1
-                    samples[p], samples[partition_end] = \
-                        samples[partition_end], samples[p]
-
-            self.criterion.reset()
-            self.criterion.update(best_split.pos)
-            self.criterion.children_impurity(&best_split.impurity_left,
-                                             &best_split.impurity_right)
-            best_split.improvement = self.criterion.impurity_improvement(
-                impurity, best_split.impurity_left, best_split.impurity_right)
+        SIZE_t[::1] indices_to_sample,
+        SIZE_t grid_size,
+        UINT32_t* random_state,
+    ) noexcept nogil:
+        cdef int i, j
 
-        # Return values
-        deref(oblique_split).proj_vec_indices = best_split.proj_vec_indices
-        deref(oblique_split).proj_vec_weights = best_split.proj_vec_weights
-        deref(oblique_split).feature = best_split.feature
-        deref(oblique_split).pos = best_split.pos
-        deref(oblique_split).threshold = best_split.threshold
-        deref(oblique_split).improvement = best_split.improvement
-        deref(oblique_split).impurity_left = best_split.impurity_left
-        deref(oblique_split).impurity_right = best_split.impurity_right
-        return 0
+        # XXX: should this be `i` or `i+1`? for valid Fisher-Yates?
+        for i in range(0, grid_size - 1):
+            j = rand_int(i, grid_size, random_state)
+            indices_to_sample[j], indices_to_sample[i] = \
+                indices_to_sample[i], indices_to_sample[j]
 
 cdef class ObliqueSplitter(BaseObliqueSplitter):
     def __cinit__(
@@ -330,12 +199,6 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         # or max w/ 1...
         self.n_non_zeros = max(int(self.max_features * self.feature_combinations), 1)
 
-    def __getstate__(self):
-        return {}
-
-    def __setstate__(self, d):
-        pass
-
     cdef int init(
         self,
         object X,
@@ -369,9 +232,9 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
 
         Parameters
         ----------
-        proj_mat_weights : vector of vectors reference
+        proj_mat_weights : vector of vectors reference of shape (mtry, n_features)
             The memory address of projection matrix non-zero weights.
-        proj_mat_indices : vector of vectors reference
+        proj_mat_indices : vector of vectors reference of shape (mtry, n_features)
             The memory address of projection matrix non-zero indices.
 
         Notes
@@ -380,7 +243,6 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         it is assumed ``feature_combinations`` is forced to be smaller than
         ``n_features`` before instantiating an oblique splitter.
         """
-
         cdef SIZE_t n_features = self.n_features
         cdef SIZE_t n_non_zeros = self.n_non_zeros
         cdef UINT32_t* random_state = &self.rand_r_state
@@ -393,10 +255,7 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         cdef SIZE_t grid_size = self.max_features * self.n_features
 
         # shuffle indices over the 2D grid to sample using Fisher-Yates
-        for i in range(0, grid_size):
-            j = rand_int(0, grid_size - i, random_state)
-            indices_to_sample[j], indices_to_sample[i] = \
-                indices_to_sample[i], indices_to_sample[j]
+        self.fisher_yates_shuffle_memview(indices_to_sample, grid_size, random_state)
 
         # sample 'n_non_zeros' in a mtry X n_features projection matrix
         # which consists of +/- 1's chosen at a 1/2s rate
@@ -404,7 +263,8 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
             # get the next index from the shuffled index array
             rand_vec_index = indices_to_sample[i]
 
-            # get the projection index and feature index
+            # get the projection index (i.e. row of the projection matrix) and
+            # feature index (i.e. column of the projection matrix)
             proj_i = rand_vec_index // n_features
             feat_i = rand_vec_index % n_features
 
@@ -428,6 +288,151 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
                     self.feature_combinations,
                 ), self.__getstate__())
 
+    cdef int node_split(
+        self,
+        double impurity,
+        SplitRecord* split,
+        SIZE_t* n_constant_features,
+        double lower_bound,
+        double upper_bound,
+    ) except -1 nogil:
+        """Find the best_split split on node samples[start:end]
+
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+        """
+        # typecast the pointer to an ObliqueSplitRecord
+        cdef ObliqueSplitRecord* oblique_split = <ObliqueSplitRecord*>(split)
+
+        # Draw random splits and pick the best_split
+        cdef SIZE_t[::1] samples = self.samples
+        cdef SIZE_t start = self.start
+        cdef SIZE_t end = self.end
+
+        # pointer array to store feature values to split on
+        cdef DTYPE_t[::1]  feature_values = self.feature_values
+        cdef SIZE_t max_features = self.max_features
+        cdef SIZE_t min_samples_leaf = self.min_samples_leaf
+
+        # keep track of split record for current_split node and the best_split split
+        # found among the sampled projection vectors
+        cdef ObliqueSplitRecord best_split, current_split
+        cdef double current_proxy_improvement = -INFINITY
+        cdef double best_proxy_improvement = -INFINITY
+
+        cdef SIZE_t feat_i, p       # index over computed features and start/end
+        cdef SIZE_t partition_end
+        cdef DTYPE_t temp_d         # to compute a projection feature value
+
+        # instantiate the split records
+        _init_split(&best_split, end)
+
+        # Sample the projection matrix
+        self.sample_proj_mat(self.proj_mat_weights, self.proj_mat_indices)
+
+        # For every vector in the projection matrix
+        for feat_i in range(max_features):
+            # Projection vector has no nonzeros
+            if self.proj_mat_weights[feat_i].empty():
+                continue
+
+            # XXX: 'feature' is not actually used in oblique split records
+            # Just indicates which split was sampled
+            current_split.feature = feat_i
+            current_split.proj_vec_weights = &self.proj_mat_weights[feat_i]
+            current_split.proj_vec_indices = &self.proj_mat_indices[feat_i]
+
+            # Compute linear combination of features and then
+            # sort samples according to the feature values.
+            self.compute_features_over_samples(
+                start,
+                end,
+                samples,
+                feature_values,
+                &self.proj_mat_weights[feat_i],
+                &self.proj_mat_indices[feat_i]
+            )
+
+            # Sort the samples
+            sort(&feature_values[start], &samples[start], end - start)
+
+            # Evaluate all splits
+            self.criterion.reset()
+            p = start
+            while p < end:
+                while (p + 1 < end and feature_values[p + 1] <= feature_values[p] + FEATURE_THRESHOLD):
+                    p += 1
+
+                p += 1
+
+                if p < end:
+                    current_split.pos = p
+
+                    # Reject if min_samples_leaf is not guaranteed
+                    if (((current_split.pos - start) < min_samples_leaf) or
+                            ((end - current_split.pos) < min_samples_leaf)):
+                        continue
+
+                    self.criterion.update(current_split.pos)
+                    # Reject if min_weight_leaf is not satisfied
+                    if self.check_postsplit_conditions() == 1:
+                        continue
+
+                    current_proxy_improvement = \
+                        self.criterion.proxy_impurity_improvement()
+
+                    if current_proxy_improvement > best_proxy_improvement:
+                        best_proxy_improvement = current_proxy_improvement
+                        # sum of halves is used to avoid infinite value
+                        current_split.threshold = feature_values[p - 1] / 2.0 + feature_values[p] / 2.0
+
+                        if (
+                            (current_split.threshold == feature_values[p]) or
+                            (current_split.threshold == INFINITY) or
+                            (current_split.threshold == -INFINITY)
+                        ):
+                            current_split.threshold = feature_values[p - 1]
+
+                        best_split = current_split  # copy
+
+        # Reorganize into samples[start:best_split.pos] + samples[best_split.pos:end]
+        if best_split.pos < end:
+            partition_end = end
+            p = start
+
+            while p < partition_end:
+                # Account for projection vector
+                temp_d = 0.0
+                for j in range(best_split.proj_vec_indices.size()):
+                    temp_d += self.X[samples[p], deref(best_split.proj_vec_indices)[j]] *\
+                                deref(best_split.proj_vec_weights)[j]
+
+                if temp_d <= best_split.threshold:
+                    p += 1
+
+                else:
+                    partition_end -= 1
+                    samples[p], samples[partition_end] = \
+                        samples[partition_end], samples[p]
+
+            self.criterion.reset()
+            self.criterion.update(best_split.pos)
+            self.criterion.children_impurity(&best_split.impurity_left,
+                                             &best_split.impurity_right)
+            best_split.improvement = self.criterion.impurity_improvement(
+                impurity, best_split.impurity_left, best_split.impurity_right)
+
+        # Return values
+        deref(oblique_split).proj_vec_indices = best_split.proj_vec_indices
+        deref(oblique_split).proj_vec_weights = best_split.proj_vec_weights
+        deref(oblique_split).feature = best_split.feature
+        deref(oblique_split).pos = best_split.pos
+        deref(oblique_split).threshold = best_split.threshold
+        deref(oblique_split).improvement = best_split.improvement
+        deref(oblique_split).impurity_left = best_split.impurity_left
+        deref(oblique_split).impurity_right = best_split.impurity_right
+        return 0
+
 cdef class RandomObliqueSplitter(ObliqueSplitter):
     def __reduce__(self):
         """Enable pickling the splitter."""
@@ -652,3 +657,403 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
         deref(oblique_split).impurity_left = best_split.impurity_left
         deref(oblique_split).impurity_right = best_split.impurity_right
         return 0
+
+
+cdef class MultiViewSplitter(BestObliqueSplitter):
+    def __cinit__(
+        self,
+        Criterion criterion,
+        SIZE_t max_features,
+        SIZE_t min_samples_leaf,
+        double min_weight_leaf,
+        object random_state,
+        const cnp.int8_t[:] monotonic_cst,
+        double feature_combinations,
+        const intp_t[:] feature_set_ends,
+        intp_t n_feature_sets,
+        *argv
+    ):
+        self.feature_set_ends = feature_set_ends
+
+        # infer the number of feature sets
+        self.n_feature_sets = n_feature_sets
+
+    def __getstate__(self):
+        return {}
+
+    def __setstate__(self, d):
+        pass
+
+    def __reduce__(self):
+        """Enable pickling the splitter."""
+        return (type(self),
+                (
+                    self.criterion,
+                    self.max_features,
+                    self.min_samples_leaf,
+                    self.min_weight_leaf,
+                    self.random_state,
+                    self.monotonic_cst.base if self.monotonic_cst is not None else None,
+                    self.feature_combinations,
+                    self.feature_set_ends.base if self.feature_set_ends is not None else None,
+                    self.n_feature_sets,
+                ), self.__getstate__())
+
+    cdef int init(
+        self,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight,
+        const unsigned char[::1] missing_values_in_feature_mask,
+    ) except -1:
+        Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
+
+        self.X = X
+
+        # create a helper array for allowing efficient Fisher-Yates
+        self.multi_indices_to_sample = vector[vector[SIZE_t]](self.n_feature_sets)
+
+        cdef SIZE_t i_feature = 0
+        cdef SIZE_t feature_set_begin = 0
+        cdef SIZE_t size_of_feature_set
+        cdef intp_t ifeat = 0
+        for i_feature in range(self.n_feature_sets):
+            size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
+            for ifeat in range(size_of_feature_set):
+                self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
+
+            feature_set_begin = self.feature_set_ends[i_feature]
+        return 0
+
+    cdef void sample_proj_mat(
+        self,
+        vector[vector[DTYPE_t]]& proj_mat_weights,
+        vector[vector[SIZE_t]]& proj_mat_indices
+    ) noexcept nogil:
+        """Sample projection matrix accounting for multi-views.
+
+        This proceeds as a normal sampling projection matrix,
+        but now also uniformly samples features from each feature set.
+        """
+        cdef UINT32_t* random_state = &self.rand_r_state
+        cdef int feat_i, proj_i
+        cdef DTYPE_t weight
+
+        # keep track of the beginning and ending indices of each feature set
+        cdef int idx
+        cdef int ifeature = 0
+        cdef int grid_size
+
+        # 03: Algorithm samples features from each set with equal probability
+        proj_i = 0
+        cdef bint finished_feature_set = False
+
+        cdef int i, j
+        while proj_i < self.max_features and not finished_feature_set:
+            # sample from a feature set
+            for idx in range(self.n_feature_sets):
+                # indices_to_sample = self.multi_indices_to_sample[idx]
+                grid_size = self.multi_indices_to_sample[idx].size()
+
+                # Note: a temporary variable must occur
+                if proj_i == 0:
+                    for i in range(0, self.multi_indices_to_sample[idx].size() - 1):
+                        j = rand_int(i + 1, grid_size, random_state)
+                        self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j] = self.multi_indices_to_sample[idx][j], self.multi_indices_to_sample[idx][i]
+
+                if ifeature >= grid_size:
+                    finished_feature_set = True
+                    continue
+
+                # sample random feature in this set
+                feat_i = self.multi_indices_to_sample[idx][ifeature]
+
+                # here, axis-aligned splits are entirely weights of 1
+                weight = 1  # if (rand_int(0, 2, random_state) == 1) else -1
+
+                proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
+                proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
+
+                proj_i += 1
+                if proj_i >= self.max_features:
+                    break
+
+                finished_feature_set = False
+
+            ifeature += 1
+
+# XXX: not used right now
+cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
+    def __cinit__(
+        self,
+        Criterion criterion,
+        SIZE_t max_features,
+        SIZE_t min_samples_leaf,
+        double min_weight_leaf,
+        object random_state,
+        const cnp.int8_t[:] monotonic_cst,
+        double feature_combinations,
+        const intp_t[:] feature_set_ends,
+        intp_t n_feature_sets,
+        bint uniform_sampling,
+        *argv
+    ):
+        self.feature_set_ends = feature_set_ends
+        self.uniform_sampling = uniform_sampling
+
+        # infer the number of feature sets
+        self.n_feature_sets = n_feature_sets
+
+    def __reduce__(self):
+        """Enable pickling the splitter."""
+        return (type(self),
+                (
+                    self.criterion,
+                    self.max_features,
+                    self.min_samples_leaf,
+                    self.min_weight_leaf,
+                    self.random_state,
+                    self.monotonic_cst.base if self.monotonic_cst is not None else None,
+                    self.feature_combinations,
+                    self.feature_set_ends,
+                    self.n_feature_sets,
+                    self.uniform_sampling,
+                ), self.__getstate__())
+
+    cdef int init(
+        self,
+        object X,
+        const DOUBLE_t[:, ::1] y,
+        const DOUBLE_t[:] sample_weight,
+        const unsigned char[::1] missing_values_in_feature_mask,
+    ) except -1:
+        Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
+
+        self.X = X
+
+        # create a helper array for allowing efficient Fisher-Yates
+        self.multi_indices_to_sample = vector[vector[SIZE_t]](self.n_feature_sets)
+
+        cdef SIZE_t i_feature = 0
+        cdef SIZE_t feature_set_begin = 0
+        cdef SIZE_t size_of_feature_set
+        cdef intp_t ifeat = 0
+        cdef intp_t iproj = 0
+        while iproj < self.max_features:
+            for i_feature in range(self.n_feature_sets):
+                size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
+
+                for ifeat in range(size_of_feature_set):
+                    self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin + (iproj * self.n_features))
+                    iproj += 1
+                    if iproj >= self.max_features:
+                        break
+                if iproj >= self.max_features:
+                    break
+
+            feature_set_begin = self.feature_set_ends[i_feature]
+        return 0
+
+    cdef void sample_proj_mat(
+        self,
+        vector[vector[DTYPE_t]]& proj_mat_weights,
+        vector[vector[SIZE_t]]& proj_mat_indices
+    ) noexcept nogil:
+        """Sample projection matrix accounting for multi-views.
+
+        This proceeds as a normal sampling projection matrix,
+        but now also uniformly samples features from each feature set.
+        """
+        cdef SIZE_t n_features = self.n_features
+        cdef SIZE_t n_non_zeros = self.n_non_zeros
+        cdef UINT32_t* random_state = &self.rand_r_state
+
+        cdef int i, j, feat_i, proj_i, rand_vec_index
+        cdef DTYPE_t weight
+
+        # construct an array to sample from mTry x n_features set of indices
+        cdef vector[SIZE_t] indices_to_sample
+        cdef SIZE_t grid_size
+
+        # compute the number of features in each feature set
+        cdef SIZE_t n_features_in_set
+
+        # keep track of the beginning and ending indices of each feature set
+        cdef int feature_set_begin, feature_set_end, idx
+        feature_set_begin = 0
+
+        # keep track of number of features sampled relative to n_non_zeros
+        cdef int ifeature = 0
+
+        if self.uniform_sampling:
+            # 01: This algorithm samples features from each feature set uniformly and combines them
+            # into one sparse projection vector.
+            while ifeature < n_non_zeros:
+                for idx in range(self.n_feature_sets):
+                    feature_set_end = self.feature_set_ends[idx]
+                    n_features_in_set = feature_set_end - feature_set_begin
+                    indices_to_sample = self.multi_indices_to_sample[idx]
+                    grid_size = indices_to_sample.size()
+
+                    # shuffle indices over the 2D grid for this feature set to sample using Fisher-Yates
+                    for i in range(0, grid_size):
+                        j = rand_int(0, grid_size, random_state)
+                        indices_to_sample[j], indices_to_sample[i] = \
+                            indices_to_sample[i], indices_to_sample[j]
+
+                    # sample a n_non_zeros matrix for each feature set, which proceeds by:
+                    # - sample 'n_non_zeros' in a mtry X n_features projection matrix
+                    # - which consists of +/- 1's chosen at a 1/2s rate
+                    # for i in range(0, n_non_zeros_per_set):
+                    # get the next index from the shuffled index array
+                    rand_vec_index = indices_to_sample[0]
+
+                    # get the projection index (i.e. row of the projection matrix) and
+                    # feature index (i.e. column of the projection matrix)
+                    proj_i = rand_vec_index // n_features
+                    feat_i = rand_vec_index % n_features
+
+                    # sample a random weight
+                    weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
+
+                    proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
+                    proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
+
+                    # the new beginning is the previous end
+                    feature_set_begin = feature_set_end
+
+                    ifeature += 1
+        else:
+            # 02: Algorithm samples feature combinations from each feature set uniformly and evaluates
+            # them independently.
+            feature_set_begin = 0
+
+            # sample from a feature set
+            for idx in range(self.n_feature_sets):
+                feature_set_end = self.feature_set_ends[idx]
+                n_features_in_set = feature_set_end - feature_set_begin
+
+                # indices to sample is a 1D-index array of size (max_features * n_features_in_set)
+                # which is Fisher-Yates shuffled to sample random features in each feature set
+                indices_to_sample = self.multi_indices_to_sample[idx]
+                grid_size = indices_to_sample.size()
+
+                # shuffle indices over the 2D grid for this feature set to sample using Fisher-Yates
+                for i in range(0, grid_size):
+                    j = rand_int(0, grid_size, random_state)
+                    indices_to_sample[j], indices_to_sample[i] = \
+                        indices_to_sample[i], indices_to_sample[j]
+
+                for i in range(0, n_non_zeros):
+                    # get the next index from the shuffled index array
+                    rand_vec_index = indices_to_sample[i]
+
+                    # get the projection index (i.e. row of the projection matrix) and
+                    # feature index (i.e. column of the projection matrix)
+                    proj_i = rand_vec_index // n_features
+                    feat_i = rand_vec_index % n_features
+
+                    # sample a random weight
+                    weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
+
+                    proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
+                    proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
+
+                # the new beginning is the previous end
+                feature_set_begin = feature_set_end
+
+
+cdef class MultiViewSplitterTester(MultiViewSplitter):
+    """A class to expose a Python interface for testing."""
+
+    cpdef sample_projection_matrix_py(self):
+        """Sample projection matrix using a patch.
+
+        Used for testing purposes.
+
+        Returns projection matrix of shape (max_features, n_features).
+        """
+        cdef vector[vector[DTYPE_t]] proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
+        cdef vector[vector[SIZE_t]] proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
+        cdef SIZE_t i, j
+
+        # sample projection matrix in C/C++
+        self.sample_proj_mat(proj_mat_weights, proj_mat_indices)
+
+        # convert the projection matrix to something that can be used in Python
+        proj_vecs = np.zeros((self.max_features, self.n_features), dtype=np.float32)
+        for i in range(0, self.max_features):
+            for j in range(0, proj_mat_weights[i].size()):
+                weight = proj_mat_weights[i][j]
+                feat = proj_mat_indices[i][j]
+
+                proj_vecs[i, feat] = weight
+
+        return proj_vecs
+
+    cpdef init_test(self, X, y, sample_weight, missing_values_in_feature_mask=None):
+        """Initializes the state of the splitter.
+
+        Used for testing purposes.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The input samples.
+        y : array-like, shape (n_samples,)
+            The target values (class labels in classification, real numbers in
+            regression).
+        sample_weight : array-like, shape (n_samples,)
+            Sample weights.
+        missing_values_in_feature_mask : array-like, shape (n_features,)
+            Whether or not a feature has missing values.
+        """
+        self.init(X, y, sample_weight, missing_values_in_feature_mask)
+
+
+cdef class BestObliqueSplitterTester(BestObliqueSplitter):
+    """A class to expose a Python interface for testing."""
+
+    cpdef sample_projection_matrix_py(self):
+        """Sample projection matrix using a patch.
+
+        Used for testing purposes.
+
+        Returns projection matrix of shape (max_features, n_features).
+        """
+        cdef vector[vector[DTYPE_t]] proj_mat_weights = vector[vector[DTYPE_t]](self.max_features)
+        cdef vector[vector[SIZE_t]] proj_mat_indices = vector[vector[SIZE_t]](self.max_features)
+        cdef SIZE_t i, j
+
+        # sample projection matrix in C/C++
+        self.sample_proj_mat(proj_mat_weights, proj_mat_indices)
+
+        # convert the projection matrix to something that can be used in Python
+        proj_vecs = np.zeros((self.max_features, self.n_features), dtype=np.float32)
+        for i in range(0, self.max_features):
+            for j in range(0, proj_mat_weights[i].size()):
+                weight = proj_mat_weights[i][j]
+                feat = proj_mat_indices[i][j]
+
+                proj_vecs[i, feat] = weight
+
+        return proj_vecs
+
+    cpdef init_test(self, X, y, sample_weight, missing_values_in_feature_mask=None):
+        """Initializes the state of the splitter.
+
+        Used for testing purposes.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The input samples.
+        y : array-like, shape (n_samples,)
+            The target values (class labels in classification, real numbers in
+            regression).
+        sample_weight : array-like, shape (n_samples,)
+            Sample weights.
+        missing_values_in_feature_mask : array-like, shape (n_features,)
+            Whether or not a feature has missing values.
+        """
+        self.init(X, y, sample_weight, missing_values_in_feature_mask)
diff --git a/sktree/tree/manifold/_morf_splitter.pxd b/sktree/tree/manifold/_morf_splitter.pxd
index 9387d6797..a05bb50e8 100644
--- a/sktree/tree/manifold/_morf_splitter.pxd
+++ b/sktree/tree/manifold/_morf_splitter.pxd
@@ -19,7 +19,7 @@ from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
 from ..._lib.sklearn.tree._tree cimport INT32_t  # Signed 32 bit integer
 from ..._lib.sklearn.tree._tree cimport SIZE_t  # Type for indices and counters
 from ..._lib.sklearn.tree._tree cimport UINT32_t  # Unsigned 32 bit integer
-from .._oblique_splitter cimport BaseObliqueSplitter, ObliqueSplitRecord
+from .._oblique_splitter cimport BestObliqueSplitter, ObliqueSplitRecord
 
 # https://github.com/cython/cython/blob/master/Cython/Includes/libcpp/algorithm.pxd
 # shows how to include standard library functions in Cython
@@ -38,7 +38,7 @@ cdef extern from "<algorithm>" namespace "std" nogil:
     void swap[T](T& a, T& b) except +  # array overload also works
 
 
-cdef class PatchSplitter(BaseObliqueSplitter):
+cdef class PatchSplitter(BestObliqueSplitter):
     # The PatchSplitter creates candidate feature values by sampling 2D patches from
     # an input data vector. The input data is vectorized, so `data_height` and
     # `data_width` are used to determine the vectorized indices corresponding to
diff --git a/sktree/tree/manifold/_morf_splitter.pyx b/sktree/tree/manifold/_morf_splitter.pyx
index 039d58008..95c37a2da 100644
--- a/sktree/tree/manifold/_morf_splitter.pyx
+++ b/sktree/tree/manifold/_morf_splitter.pyx
@@ -19,7 +19,7 @@ from ..._lib.sklearn.tree._criterion cimport Criterion
 from .._utils cimport ravel_multi_index_cython, unravel_index_cython
 
 
-cdef class PatchSplitter(BaseObliqueSplitter):
+cdef class PatchSplitter(BestObliqueSplitter):
     """Patch splitter.
 
     A convolutional 2D patch splitter.
@@ -37,7 +37,7 @@ cdef class PatchSplitter(BaseObliqueSplitter):
         const DOUBLE_t[:] sample_weight,
         const unsigned char[::1] missing_values_in_feature_mask,
     ) except -1:
-        BaseObliqueSplitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
+        BestObliqueSplitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
 
         return 0
 
@@ -125,6 +125,7 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         double min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
+        double feature_combinations,
         const SIZE_t[:] min_patch_dims,
         const SIZE_t[:] max_patch_dims,
         const cnp.uint8_t[:] dim_contiguous,
@@ -186,6 +187,7 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
                 self.min_weight_leaf,
                 self.random_state,
                 self.monotonic_cst.base if self.monotonic_cst is not None else None,
+                self.feature_combinations,
                 self.min_patch_dims.base if self.min_patch_dims is not None else None,
                 self.max_patch_dims.base if self.max_patch_dims is not None else None,
                 self.dim_contiguous.base if self.dim_contiguous is not None else None,
@@ -480,7 +482,7 @@ cdef class BestPatchSplitterTester(BestPatchSplitter):
                 proj_vecs[i, feat] = weight
         return proj_vecs
 
-    cpdef sample_projection_matrix(self):
+    cpdef sample_projection_matrix_py(self):
         """Sample projection matrix using a patch.
 
         Used for testing purposes.
diff --git a/sktree/tree/meson.build b/sktree/tree/meson.build
index 09ac76013..cc13bc1b4 100644
--- a/sktree/tree/meson.build
+++ b/sktree/tree/meson.build
@@ -19,6 +19,7 @@ endforeach
 python_sources = [
   '__init__.py',
   '_classes.py',
+  '_multiview.py',
   '_neighbors.py',
   '_honest_tree.py',
   '_marginalize.py',
diff --git a/sktree/tree/tests/meson.build b/sktree/tree/tests/meson.build
index e3d54ab85..c88eeda7c 100644
--- a/sktree/tree/tests/meson.build
+++ b/sktree/tree/tests/meson.build
@@ -6,6 +6,7 @@ python_sources = [
   'test_marginal.py',
   'test_all_trees.py',
   'test_unsupervised_tree.py',
+  'test_multiview.py',
 ]
 
 py3.install_sources(
diff --git a/sktree/tree/tests/test_multiview.py b/sktree/tree/tests/test_multiview.py
new file mode 100644
index 000000000..cddfc661c
--- /dev/null
+++ b/sktree/tree/tests/test_multiview.py
@@ -0,0 +1,86 @@
+import numpy as np
+import pytest
+from sklearn.datasets import make_blobs
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import cross_val_score
+from sklearn.utils.estimator_checks import parametrize_with_checks
+
+from sktree.tree import DecisionTreeClassifier, MultiViewDecisionTreeClassifier
+
+seed = 12345
+
+
+@parametrize_with_checks(
+    [
+        MultiViewDecisionTreeClassifier(random_state=12),
+    ]
+)
+def test_sklearn_compatible_estimator(estimator, check):
+    check(estimator)
+
+
+@pytest.mark.parametrize("baseline_est", [MultiViewDecisionTreeClassifier, DecisionTreeClassifier])
+def test_multiview_classification(baseline_est):
+    """Test that explicit knowledge of multi-view structure improves classification accuracy.
+
+    In very high-dimensional noise setting across two views, when the max_depth and max_features
+    are constrained, the multi-view decision tree will still obtain perfect accuracy, while
+    the single-view decision tree will not.
+    """
+    rng = np.random.default_rng(seed=seed)
+
+    n_samples = 20
+    n_features_1 = 5
+    n_features_2 = 1000
+    cluster_std = 2.0
+
+    # Create a high-dimensional multiview dataset with a low-dimensional informative
+    # subspace in one view of the dataset.
+    X0_first, y0 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    X1_first, y1 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+    y1[:] = 1
+    X0 = np.concatenate([X0_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X1 = np.concatenate([X1_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X = np.vstack((X0, X1))
+    y = np.hstack((y0, y1)).T
+
+    # Compare multiview decision tree vs single-view decision tree
+    clf = MultiViewDecisionTreeClassifier(
+        random_state=seed,
+        feature_set_ends=[n_features_1, X.shape[1]],
+        max_features="sqrt",
+        max_depth=5,
+    )
+    clf.fit(X, y)
+    assert (
+        accuracy_score(y, clf.predict(X)) == 1.0
+    ), f"Accuracy score: {accuracy_score(y, clf.predict(X))}"
+    assert (
+        cross_val_score(clf, X, y, cv=5).mean() == 1.0
+    ), f"CV score: {cross_val_score(clf, X, y, cv=5).mean()}"
+
+    clf = baseline_est(
+        random_state=seed,
+        max_depth=5,
+        max_features="sqrt",
+    )
+    clf.fit(X, y)
+    assert (
+        accuracy_score(y, clf.predict(X)) == 1.0
+    ), f"Accuracy score: {accuracy_score(y, clf.predict(X))}"
+    assert (
+        cross_val_score(clf, X, y, cv=5).mean() <= 0.6
+    ), f"CV score: {cross_val_score(clf, X, y, cv=5).mean()}"
diff --git a/sktree/tree/tests/test_tree.py b/sktree/tree/tests/test_tree.py
index 5bcd23c23..478aa6d5b 100644
--- a/sktree/tree/tests/test_tree.py
+++ b/sktree/tree/tests/test_tree.py
@@ -170,6 +170,7 @@ def test_pickle_splitters():
     dim_contiguous = np.array((True, True))
     data_dims = np.array((5, 5))
 
+    feature_combinations = 1.5
     splitter = BestObliqueSplitter(
         criterion,
         max_features,
@@ -177,7 +178,7 @@ def test_pickle_splitters():
         min_weight_leaf,
         random_state,
         monotonic_cst,
-        1.5,
+        feature_combinations,
     )
     with tempfile.TemporaryFile() as f:
         joblib.dump(splitter, f)
@@ -189,7 +190,7 @@ def test_pickle_splitters():
         min_weight_leaf,
         random_state,
         monotonic_cst,
-        1.5,
+        feature_combinations,
     )
     with tempfile.TemporaryFile() as f:
         joblib.dump(splitter, f)
@@ -201,6 +202,7 @@ def test_pickle_splitters():
         min_weight_leaf,
         random_state,
         monotonic_cst,
+        feature_combinations,
         min_patch_dims,
         max_patch_dims,
         dim_contiguous,
@@ -471,7 +473,7 @@ def test_diabetes_overfit(name, Tree, criterion):
     "criterion, max_depth, metric, max_loss",
     [
         ("squared_error", 15, mean_squared_error, 65),
-        ("absolute_error", 20, mean_squared_error, 60),
+        ("absolute_error", 25, mean_squared_error, 60),
         ("friedman_mse", 15, mean_squared_error, 65),
         ("poisson", 15, mean_poisson_deviance, 30),
     ],
@@ -483,7 +485,9 @@ def test_diabetes_underfit(name, Tree, criterion, max_depth, metric, max_loss):
     reg = Tree(criterion=criterion, max_depth=max_depth, max_features=10, random_state=1234)
     reg.fit(diabetes.data, diabetes.target)
     loss = metric(diabetes.target, reg.predict(diabetes.data))
-    assert 0.0 <= loss < max_loss
+    assert (
+        0.0 <= loss < max_loss
+    ), f"Failed with {name}, criterion = {criterion} and loss = {loss}, and max_loss: {max_loss}"
 
 
 def test_numerical_stability():
diff --git a/sktree/tree/tests/test_utils.py b/sktree/tree/tests/test_utils.py
index 3ba69adbc..3cf6705bf 100644
--- a/sktree/tree/tests/test_utils.py
+++ b/sktree/tree/tests/test_utils.py
@@ -100,6 +100,8 @@ def test_best_patch_splitter_contiguous():
     # monotonic constraints are not supported for oblique splits
     monotonic_cst = None
 
+    feature_combinations = 1.5
+
     splitter = BestPatchSplitterTester(
         criterion,
         max_features,
@@ -107,6 +109,7 @@ def test_best_patch_splitter_contiguous():
         min_weight_leaf,
         random_state,
         monotonic_cst,
+        feature_combinations,
         min_patch_dims,
         max_patch_dims,
         dim_contiguous,
diff --git a/sktree/tree/unsupervised/_unsup_criterion.pxd b/sktree/tree/unsupervised/_unsup_criterion.pxd
index 47967841a..bfbd7428a 100644
--- a/sktree/tree/unsupervised/_unsup_criterion.pxd
+++ b/sktree/tree/unsupervised/_unsup_criterion.pxd
@@ -2,8 +2,6 @@
 # cython: wraparound=False
 # cython: language_level=3
 
-from libcpp.unordered_map cimport unordered_map
-
 from ..._lib.sklearn.tree._criterion cimport BaseCriterion
 from ..._lib.sklearn.tree._tree cimport DOUBLE_t  # Type of y, sample_weight
 from ..._lib.sklearn.tree._tree cimport DTYPE_t  # Type of X
@@ -47,11 +45,6 @@ cdef class UnsupervisedCriterion(BaseCriterion):
     cdef double sumsq_left      # Same as above, but for the left side of the split
     cdef double sumsq_right     # Same as above, but for the right side of the split
 
-    # use memoization to re-compute variance of any subsegment in O(1)
-    # cdef unordered_map[SIZE_t, DTYPE_t] cumsum_of_squares_map
-    # cdef unordered_map[SIZE_t, DTYPE_t] cumsum_map
-    # cdef unordered_map[SIZE_t, DTYPE_t] cumsum_weights_map
-
     # Methods
     # -------
     # The 'init' method is copied here with the almost the exact same signature
diff --git a/sktree/tree/unsupervised/_unsup_criterion.pyx b/sktree/tree/unsupervised/_unsup_criterion.pyx
index 87874b800..518fc2753 100644
--- a/sktree/tree/unsupervised/_unsup_criterion.pyx
+++ b/sktree/tree/unsupervised/_unsup_criterion.pyx
@@ -78,11 +78,6 @@ cdef class UnsupervisedCriterion(BaseCriterion):
         # and then update will never have to iterate through even
         cdef DOUBLE_t w = 1.0
 
-        # cdef SIZE_t prev_s_idx = -1
-        # self.cumsum_of_squares_map[prev_s_idx] = 0.0
-        # self.cumsum_map[prev_s_idx] = 0.0
-        # self.cumsum_weights_map[prev_s_idx] = 0.0
-
         for p_idx in range(self.start, self.end):
             s_idx = self.sample_indices[p_idx]