From 3ddb98b680385b69ff4f47b3fbbb84f8895ab892 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 14 Mar 2024 13:05:07 -0400
Subject: [PATCH 01/26] Fix multiview API and enable oblique multiview

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_multiview.py     |  13 --
 sktree/tree/_multiview.py         |  17 +--
 sktree/tree/_oblique_splitter.pxd |  11 +-
 sktree/tree/_oblique_splitter.pyx | 241 +++++++-----------------------
 4 files changed, 60 insertions(+), 222 deletions(-)

diff --git a/sktree/ensemble/_multiview.py b/sktree/ensemble/_multiview.py
index f1102b66a..8f9c52971 100644
--- a/sktree/ensemble/_multiview.py
+++ b/sktree/ensemble/_multiview.py
@@ -159,16 +159,6 @@ class MultiViewRandomForestClassifier(
         - If float, then draw `max_samples * X.shape[0]` samples. Thus,
           `max_samples` should be in the interval `(0.0, 1.0]`.
 
-    feature_combinations : float, default=None
-        The number of features to combine on average at each split
-        of the decision trees. If ``None``, then will default to the minimum of
-        ``(1.5, n_features)``. This controls the number of non-zeros is the
-        projection matrix. Setting the value to 1.0 is equivalent to a
-        traditional decision-tree. ``feature_combinations * max_features``
-        gives the number of expected non-zeros in the projection matrix of shape
-        ``(max_features, n_features)``. Thus this value must always be less than
-        ``n_features`` in order to be valid.
-
     feature_set_ends : array-like of int of shape (n_feature_sets,), default=None
         The indices of the end of each feature set. For example, if the first
         feature set is the first 10 features, and the second feature set is the
@@ -270,7 +260,6 @@ def __init__(
         warm_start=False,
         class_weight=None,
         max_samples=None,
-        feature_combinations=None,
         feature_set_ends=None,
         apply_max_features_per_feature_set=False,
     ):
@@ -287,7 +276,6 @@ def __init__(
                 "max_leaf_nodes",
                 "min_impurity_decrease",
                 "random_state",
-                "feature_combinations",
                 "feature_set_ends",
                 "apply_max_features_per_feature_set",
             ),
@@ -305,7 +293,6 @@ def __init__(
         self.min_samples_split = min_samples_split
         self.min_samples_leaf = min_samples_leaf
         self.max_features = max_features
-        self.feature_combinations = feature_combinations
         self.feature_set_ends = feature_set_ends
         self.apply_max_features_per_feature_set = apply_max_features_per_feature_set
 
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 52b70c0df..a01ba986c 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -158,9 +158,6 @@ class MultiViewDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
         Note that these weights will be multiplied with sample_weight (passed
         through the fit method) if sample_weight is specified.
 
-    feature_combinations : float, default=None
-        Not used.
-
     ccp_alpha : non-negative float, default=0.0
         Not used.
 
@@ -226,9 +223,6 @@ class MultiViewDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
         ``help(sklearn.tree._tree.Tree)`` for
         attributes of Tree object.
 
-    feature_combinations_ : float
-        The number of feature combinations on average taken to fit the tree.
-
     feature_set_ends_ : array-like of int of shape (n_feature_sets,)
         The indices of the end of each feature set.
 
@@ -248,10 +242,6 @@ class MultiViewDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
 
     _parameter_constraints = {
         **DecisionTreeClassifier._parameter_constraints,
-        "feature_combinations": [
-            Interval(Real, 1.0, None, closed="left"),
-            None,
-        ],
         "feature_set_ends": ["array-like", None],
         "apply_max_features_per_feature_set": ["boolean"],
     }
@@ -278,7 +268,6 @@ def __init__(
         max_leaf_nodes=None,
         min_impurity_decrease=0.0,
         class_weight=None,
-        feature_combinations=None,
         ccp_alpha=0.0,
         store_leaf_values=False,
         monotonic_cst=None,
@@ -302,7 +291,6 @@ def __init__(
             monotonic_cst=monotonic_cst,
         )
 
-        self.feature_combinations = feature_combinations
         self.feature_set_ends = feature_set_ends
         self.apply_max_features_per_feature_set = apply_max_features_per_feature_set
         self._max_features_arr = None
@@ -362,7 +350,7 @@ def _build_tree(
         monotonic_cst = None
         _, n_features = X.shape
 
-        self.feature_combinations_ = 1
+        self._feature_combinations_ = 1
 
         # Build tree
         criterion = self.criterion
@@ -485,7 +473,7 @@ def _build_tree(
                 min_weight_leaf,
                 random_state,
                 monotonic_cst,
-                self.feature_combinations_,
+                self._feature_combinations_,
                 self.feature_set_ends_,
                 self.n_feature_sets_,
                 self.max_features_per_set_,
@@ -584,7 +572,6 @@ def _inheritable_fitted_attribute(self):
         """
         return [
             "max_features_",
-            "feature_combinations_",
             "feature_set_ends_",
             "n_feature_sets_",
             "n_features_in_set_",
diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
index 3f17b8c6b..ea5187c09 100644
--- a/sktree/tree/_oblique_splitter.pxd
+++ b/sktree/tree/_oblique_splitter.pxd
@@ -164,15 +164,8 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
 
 
 # XXX: This splitter is experimental. Expect changes frequently.
-cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
-    cdef const intp_t[:] feature_set_ends   # an array indicating the column indices of the end of each feature set
-    cdef intp_t n_feature_sets                  # the number of feature sets is the length of feature_set_ends + 1
-
-    # whether or not to uniformly sample feature-sets into each projection vector
-    # if True, then sample from each feature set for each projection vector
-    cdef bint uniform_sampling
-
-    cdef vector[vector[intp_t]] multi_indices_to_sample
+cdef class MultiViewObliqueSplitter(MultiViewSplitter):
+    cdef const intp_t[:] n_non_zeros_per_set  # the number of non-zero features in each feature set
 
     cdef void sample_proj_mat(
         self,
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index 23b6e722d..e6d978658 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -753,34 +753,26 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
 
         # 01: Algorithm samples features from each set equally with the same number
         # of candidates, but if one feature set is exhausted, then that one is no longer sampled
-        cdef intp_t finished_feature_set_count = 0
-        cdef bint finished_feature_sets = False
         cdef intp_t i, j
 
         proj_i = 0
 
-        if self.max_features_per_set is None:
-            while proj_i < self.max_features and not finished_feature_sets:
-                finished_feature_sets = False
-                finished_feature_set_count = 0
-
-                # sample from a feature set
-                for idx in range(self.n_feature_sets):
-                    # indices_to_sample = self.multi_indices_to_sample[idx]
-                    grid_size = self.multi_indices_to_sample[idx].size()
-
-                    # Note: a temporary variable must not be used, else a copy will be made
-                    if proj_i == 0:
-                        for i in range(0, self.multi_indices_to_sample[idx].size() - 1):
-                            j = rand_int(i + 1, grid_size, random_state)
-                            self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j] = \
-                                self.multi_indices_to_sample[idx][j], self.multi_indices_to_sample[idx][i]
-
-                    # keep track of which feature-sets are exhausted
-                    if ifeature >= grid_size:
-                        finished_feature_set_count += 1
-                        continue
+        # 02: Algorithm samples a different number features from each set, but considers
+        # each feature-set equally
+        while proj_i < self.max_features:
+            # sample from a feature set
+            for idx in range(self.n_feature_sets):
+                # get the max-features for this feature-set
+                max_features = self.max_features_per_set[idx]
 
+                grid_size = self.multi_indices_to_sample[idx].size()
+                # Note: a temporary variable must not be used, else a copy will be made
+                for i in range(0, self.multi_indices_to_sample[idx].size() - 1):
+                    j = rand_int(i + 1, grid_size, random_state)
+                    self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j] = \
+                        self.multi_indices_to_sample[idx][j], self.multi_indices_to_sample[idx][i]
+
+                for ifeature in range(max_features):
                     # sample random feature in this set
                     feat_i = self.multi_indices_to_sample[idx][ifeature]
 
@@ -793,45 +785,11 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                     proj_i += 1
                     if proj_i >= self.max_features:
                         break
+                if proj_i >= self.max_features:
+                    break
 
-                if finished_feature_set_count == self.n_feature_sets:
-                    finished_feature_sets = True
 
-                ifeature += 1
-        # 02: Algorithm samples a different number features from each set, but considers
-        # each feature-set equally
-        else:
-            while proj_i < self.max_features:
-                # sample from a feature set
-                for idx in range(self.n_feature_sets):
-                    # get the max-features for this feature-set
-                    max_features = self.max_features_per_set[idx]
-
-                    grid_size = self.multi_indices_to_sample[idx].size()
-                    # Note: a temporary variable must not be used, else a copy will be made
-                    for i in range(0, self.multi_indices_to_sample[idx].size() - 1):
-                        j = rand_int(i + 1, grid_size, random_state)
-                        self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j] = \
-                            self.multi_indices_to_sample[idx][j], self.multi_indices_to_sample[idx][i]
-
-                    for ifeature in range(max_features):
-                        # sample random feature in this set
-                        feat_i = self.multi_indices_to_sample[idx][ifeature]
-
-                        # here, axis-aligned splits are entirely weights of 1
-                        weight = 1  # if (rand_int(0, 2, random_state) == 1) else -1
-
-                        proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
-                        proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
-
-                        proj_i += 1
-                        if proj_i >= self.max_features:
-                            break
-                    if proj_i >= self.max_features:
-                        break
-
-# XXX: not used right now
-cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
+cdef class MultiViewObliqueSplitter(MultiViewSplitter):
     def __cinit__(
         self,
         Criterion criterion,
@@ -843,64 +801,23 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
         float64_t feature_combinations,
         const intp_t[:] feature_set_ends,
         intp_t n_feature_sets,
-        bint uniform_sampling,
+        const intp_t[:] max_features_per_set,
         *argv
     ):
         self.feature_set_ends = feature_set_ends
-        self.uniform_sampling = uniform_sampling
 
         # infer the number of feature sets
         self.n_feature_sets = n_feature_sets
 
-    def __reduce__(self):
-        """Enable pickling the splitter."""
-        return (type(self),
-                (
-                    self.criterion,
-                    self.max_features,
-                    self.min_samples_leaf,
-                    self.min_weight_leaf,
-                    self.random_state,
-                    self.monotonic_cst.base if self.monotonic_cst is not None else None,
-                    self.feature_combinations,
-                    self.feature_set_ends,
-                    self.n_feature_sets,
-                    self.uniform_sampling,
-                ), self.__getstate__())
-
-    cdef int init(
-        self,
-        object X,
-        const float64_t[:, ::1] y,
-        const float64_t[:] sample_weight,
-        const unsigned char[::1] missing_values_in_feature_mask,
-    ) except -1:
-        Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
-
-        self.X = X
-
-        # create a helper array for allowing efficient Fisher-Yates
-        self.multi_indices_to_sample = vector[vector[intp_t]](self.n_feature_sets)
-
-        cdef intp_t i_feature = 0
-        cdef intp_t feature_set_begin = 0
-        cdef intp_t size_of_feature_set
-        cdef intp_t ifeat = 0
-        cdef intp_t iproj = 0
-        while iproj < self.max_features:
-            for i_feature in range(self.n_feature_sets):
-                size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
-
-                for ifeat in range(size_of_feature_set):
-                    self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin + (iproj * self.n_features))
-                    iproj += 1
-                    if iproj >= self.max_features:
-                        break
-                if iproj >= self.max_features:
-                    break
+        # replaces usage of max_features
+        self.max_features_per_set = max_features_per_set
 
-            feature_set_begin = self.feature_set_ends[i_feature]
-        return 0
+        # compute # of non-zeros expected on average per feature set
+        cdef intp_t[:] n_non_zeros_per_set = np.zeros(self.n_feature_sets, dtype=np.intp)
+        cdef intp_t i
+        for i in range(self.n_feature_sets):
+            n_non_zeros_per_set[i] = <intp_t> (self.max_features_per_set[i] * self.feature_combinations)
+        self.n_non_zeros_per_set = n_non_zeros_per_set
 
     cdef void sample_proj_mat(
         self,
@@ -913,7 +830,6 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
         but now also uniformly samples features from each feature set.
         """
         cdef intp_t n_features = self.n_features
-        cdef intp_t n_non_zeros = self.n_non_zeros
         cdef UINT32_t* random_state = &self.rand_r_state
 
         cdef intp_t i, j, feat_i, proj_i, rand_vec_index
@@ -923,92 +839,47 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
         cdef vector[intp_t] indices_to_sample
         cdef intp_t grid_size
 
-        # compute the number of features in each feature set
-        cdef intp_t n_features_in_set
-
         # keep track of the beginning and ending indices of each feature set
         cdef intp_t feature_set_begin, feature_set_end, idx
         feature_set_begin = 0
 
-        # keep track of number of features sampled relative to n_non_zeros
-        cdef intp_t ifeature = 0
-
-        if self.uniform_sampling:
-            # 01: This algorithm samples features from each feature set uniformly and combines them
-            # into one sparse projection vector.
-            while ifeature < n_non_zeros:
-                for idx in range(self.n_feature_sets):
-                    feature_set_end = self.feature_set_ends[idx]
-                    n_features_in_set = feature_set_end - feature_set_begin
-                    indices_to_sample = self.multi_indices_to_sample[idx]
-                    grid_size = indices_to_sample.size()
-
-                    # shuffle indices over the 2D grid for this feature set to sample using Fisher-Yates
-                    for i in range(0, grid_size):
-                        j = rand_int(0, grid_size, random_state)
-                        indices_to_sample[j], indices_to_sample[i] = \
-                            indices_to_sample[i], indices_to_sample[j]
-
-                    # sample a n_non_zeros matrix for each feature set, which proceeds by:
-                    # - sample 'n_non_zeros' in a mtry X n_features projection matrix
-                    # - which consists of +/- 1's chosen at a 1/2s rate
-                    # for i in range(0, n_non_zeros_per_set):
-                    # get the next index from the shuffled index array
-                    rand_vec_index = indices_to_sample[0]
-
-                    # get the projection index (i.e. row of the projection matrix) and
-                    # feature index (i.e. column of the projection matrix)
-                    proj_i = rand_vec_index // n_features
-                    feat_i = rand_vec_index % n_features
-
-                    # sample a random weight
-                    weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
-
-                    proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
-                    proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
-
-                    # the new beginning is the previous end
-                    feature_set_begin = feature_set_end
-
-                    ifeature += 1
-        else:
-            # 02: Algorithm samples feature combinations from each feature set uniformly and evaluates
-            # them independently.
-            feature_set_begin = 0
+        # 02: Algorithm samples feature combinations from each feature set uniformly and evaluates
+        # them independently.
+        feature_set_begin = 0
 
-            # sample from a feature set
-            for idx in range(self.n_feature_sets):
-                feature_set_end = self.feature_set_ends[idx]
-                n_features_in_set = feature_set_end - feature_set_begin
+        # sample from a feature set using linear combinations among the two sets
+        for idx in range(self.n_feature_sets):
+            feature_set_end = self.feature_set_ends[idx]
 
-                # indices to sample is a 1D-index array of size (max_features * n_features_in_set)
-                # which is Fisher-Yates shuffled to sample random features in each feature set
-                indices_to_sample = self.multi_indices_to_sample[idx]
-                grid_size = indices_to_sample.size()
+            # indices to sample is a 1D-index array of size (max_features * n_features_in_set)
+            # which is Fisher-Yates shuffled to sample random features in each feature set
+            indices_to_sample = self.multi_indices_to_sample[idx]
+            grid_size = indices_to_sample.size()
 
-                # shuffle indices over the 2D grid for this feature set to sample using Fisher-Yates
-                for i in range(0, grid_size):
-                    j = rand_int(0, grid_size, random_state)
-                    indices_to_sample[j], indices_to_sample[i] = \
-                        indices_to_sample[i], indices_to_sample[j]
+            # shuffle indices over the 2D grid for this feature set to sample using Fisher-Yates
+            for i in range(0, grid_size):
+                j = rand_int(0, grid_size, random_state)
+                indices_to_sample[j], indices_to_sample[i] = \
+                    indices_to_sample[i], indices_to_sample[j]
 
-                for i in range(0, n_non_zeros):
-                    # get the next index from the shuffled index array
-                    rand_vec_index = indices_to_sample[i]
+            # we want "n_non_zeros / K" for this feature set over K feature sets
+            for i in range(0, self.n_non_zeros_per_set[idx]):
+                # get the next index from the shuffled index array
+                rand_vec_index = indices_to_sample[i]
 
-                    # get the projection index (i.e. row of the projection matrix) and
-                    # feature index (i.e. column of the projection matrix)
-                    proj_i = rand_vec_index // n_features
-                    feat_i = rand_vec_index % n_features
+                # get the projection index (i.e. row of the projection matrix) and
+                # feature index (i.e. column of the projection matrix)
+                proj_i = rand_vec_index // n_features
+                feat_i = rand_vec_index % n_features
 
-                    # sample a random weight
-                    weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
+                # sample a random weight
+                weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
 
-                    proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
-                    proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
+                proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
+                proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
-                # the new beginning is the previous end
-                feature_set_begin = feature_set_end
+            # the new beginning is the previous end
+            feature_set_begin = feature_set_end
 
 
 cdef class MultiViewSplitterTester(MultiViewSplitter):

From 923c1711c280516b48ca9ae7aca2bbd5dacff919 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 14 Mar 2024 13:11:49 -0400
Subject: [PATCH 02/26] Clean up other unused kwarg path

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_multiview.py         |   8 --
 sktree/stats/tests/test_forestht.py   |   1 -
 sktree/tests/test_multiview_forest.py |   1 -
 sktree/tree/_multiview.py             | 112 ++++++++++----------------
 sktree/tree/tests/test_multiview.py   |  26 ++----
 5 files changed, 47 insertions(+), 101 deletions(-)

diff --git a/sktree/ensemble/_multiview.py b/sktree/ensemble/_multiview.py
index 8f9c52971..828212335 100644
--- a/sktree/ensemble/_multiview.py
+++ b/sktree/ensemble/_multiview.py
@@ -165,11 +165,6 @@ class MultiViewRandomForestClassifier(
         next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
         then this will assume that there is only one feature set.
 
-    apply_max_features_per_feature_set : bool, default=False
-        Whether to apply sampling per feature set, where ``max_features`` is applied
-        to each feature-set. If ``False``, then sampling
-        is applied over the entire feature space.
-
     Attributes
     ----------
     estimators_ : list of sktree.tree.ObliqueDecisionTreeClassifier
@@ -261,7 +256,6 @@ def __init__(
         class_weight=None,
         max_samples=None,
         feature_set_ends=None,
-        apply_max_features_per_feature_set=False,
     ):
         super().__init__(
             estimator=MultiViewDecisionTreeClassifier(),
@@ -277,7 +271,6 @@ def __init__(
                 "min_impurity_decrease",
                 "random_state",
                 "feature_set_ends",
-                "apply_max_features_per_feature_set",
             ),
             bootstrap=bootstrap,
             oob_score=oob_score,
@@ -294,7 +287,6 @@ def __init__(
         self.min_samples_leaf = min_samples_leaf
         self.max_features = max_features
         self.feature_set_ends = feature_set_ends
-        self.apply_max_features_per_feature_set = apply_max_features_per_feature_set
 
         # unused by oblique forests
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 091eff99a..af4193ebf 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -710,7 +710,6 @@ def test_comight_repeated_feature_sets():
             tree_estimator=MultiViewDecisionTreeClassifier(
                 feature_set_ends=feature_set_ends,
                 max_features=0.3,
-                apply_max_features_per_feature_set=True,
             ),
         ),
         test_size=0.2,
diff --git a/sktree/tests/test_multiview_forest.py b/sktree/tests/test_multiview_forest.py
index 95119b580..da168bbba 100644
--- a/sktree/tests/test_multiview_forest.py
+++ b/sktree/tests/test_multiview_forest.py
@@ -150,7 +150,6 @@ def test_three_view_dataset(n_views, max_features):
     clf = MultiViewRandomForestClassifier(
         random_state=seed,
         feature_set_ends=feature_set_ends,
-        apply_max_features_per_feature_set=True,
         max_features=max_features,
         n_estimators=n_estimators,
     )
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index a01ba986c..81aa24963 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -178,11 +178,6 @@ class MultiViewDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
         next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
         then this will assume that there is only one feature set.
 
-    apply_max_features_per_feature_set : bool, default=False
-        Whether to apply sampling per feature set, where ``max_features`` is applied
-        to each feature-set. If ``False``, then sampling
-        is applied over the entire feature space.
-
     Attributes
     ----------
     classes_ : ndarray of shape (n_classes,) or list of ndarray
@@ -243,7 +238,6 @@ class MultiViewDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
     _parameter_constraints = {
         **DecisionTreeClassifier._parameter_constraints,
         "feature_set_ends": ["array-like", None],
-        "apply_max_features_per_feature_set": ["boolean"],
     }
     _parameter_constraints.pop("max_features")
     _parameter_constraints["max_features"] = [
@@ -272,7 +266,6 @@ def __init__(
         store_leaf_values=False,
         monotonic_cst=None,
         feature_set_ends=None,
-        apply_max_features_per_feature_set=False,
     ):
         super().__init__(
             criterion=criterion,
@@ -292,7 +285,6 @@ def __init__(
         )
 
         self.feature_set_ends = feature_set_ends
-        self.apply_max_features_per_feature_set = apply_max_features_per_feature_set
         self._max_features_arr = None
 
     def _build_tree(
@@ -383,7 +375,6 @@ def _build_tree(
 
         if isinstance(self._max_features_arr, (Integral, Real, str, type(None))):
             max_features_arr_ = [self._max_features_arr] * self.n_feature_sets_
-            stratify_mtry_per_view = self.apply_max_features_per_feature_set
         else:
             if not isinstance(self._max_features_arr, (list, np.ndarray)):
                 raise ValueError(
@@ -396,74 +387,53 @@ def _build_tree(
                     f"got {len(self.max_features)}"
                 )
             max_features_arr_ = self._max_features_arr
-            stratify_mtry_per_view = True
 
         self.n_features_in_set_ = []
-        if stratify_mtry_per_view:
-            # XXX: experimental
-            # we can replace max_features_ here based on whether or not uniform logic over
-            # feature sets
-            max_features_per_set = []
-            n_features_in_prev = 0
-            for idx in range(self.n_feature_sets_):
-                max_features = max_features_arr_[idx]
-
-                n_features_in_ = self.feature_set_ends_[idx] - n_features_in_prev
-                n_features_in_prev += n_features_in_
-                self.n_features_in_set_.append(n_features_in_)
-                if isinstance(max_features, str):
-                    if max_features == "sqrt":
-                        max_features = max(1, math.ceil(np.sqrt(n_features_in_)))
-                    elif max_features == "log2":
-                        max_features = max(1, math.ceil(np.log2(n_features_in_)))
-                elif max_features is None:
-                    max_features = n_features_in_
-                elif isinstance(max_features, numbers.Integral):
-                    max_features = max_features
-                else:  # float
-                    if max_features > 0.0:
-                        max_features = max(1, math.ceil(max_features * n_features_in_))
-                    else:
-                        max_features = 0
-
-                if max_features > n_features_in_:
-                    raise ValueError(
-                        f"max_features must be less than or equal to "
-                        f"the number of features in feature set {idx}: {n_features_in_}, but "
-                        f"max_features = {max_features} when applying sampling"
-                        f"per feature set."
-                    )
-
-                max_features_per_set.append(max_features)
-            self.max_features_ = np.sum(max_features_per_set)
-            if self.max_features_ > n_features:
-                raise ValueError(
-                    "max_features is greater than the number of features: "
-                    f"{max_features} > {n_features}."
-                    "This should not be possible. Please submit a bug report."
-                )
-            self.max_features_per_set_ = np.asarray(max_features_per_set, dtype=np.intp)
-            # the total number of features to sample per split
-            self.max_features_ = np.sum(self.max_features_per_set_)
-        else:
-            self.max_features_per_set_ = None
-            self.max_features = self._max_features_arr
-            if isinstance(self.max_features, str):
-                if self.max_features == "sqrt":
-                    max_features = max(1, int(np.sqrt(self.n_features_in_)))
-                elif self.max_features == "log2":
-                    max_features = max(1, int(np.log2(self.n_features_in_)))
-            elif self.max_features is None:
-                max_features = self.n_features_in_
-            elif isinstance(self.max_features, numbers.Integral):
-                max_features = self.max_features
+        # XXX: experimental
+        # we can replace max_features_ here based on whether or not uniform logic over
+        # feature sets
+        max_features_per_set = []
+        n_features_in_prev = 0
+        for idx in range(self.n_feature_sets_):
+            max_features = max_features_arr_[idx]
+
+            n_features_in_ = self.feature_set_ends_[idx] - n_features_in_prev
+            n_features_in_prev += n_features_in_
+            self.n_features_in_set_.append(n_features_in_)
+            if isinstance(max_features, str):
+                if max_features == "sqrt":
+                    max_features = max(1, math.ceil(np.sqrt(n_features_in_)))
+                elif max_features == "log2":
+                    max_features = max(1, math.ceil(np.log2(n_features_in_)))
+            elif max_features is None:
+                max_features = n_features_in_
+            elif isinstance(max_features, numbers.Integral):
+                max_features = max_features
             else:  # float
-                if self.max_features > 0.0:
-                    max_features = max(1, int(self.max_features * self.n_features_in_))
+                if max_features > 0.0:
+                    max_features = max(1, math.ceil(max_features * n_features_in_))
                 else:
                     max_features = 0
 
-            self.max_features_ = max_features
+            if max_features > n_features_in_:
+                raise ValueError(
+                    f"max_features must be less than or equal to "
+                    f"the number of features in feature set {idx}: {n_features_in_}, but "
+                    f"max_features = {max_features} when applying sampling"
+                    f"per feature set."
+                )
+
+            max_features_per_set.append(max_features)
+        self.max_features_ = np.sum(max_features_per_set)
+        if self.max_features_ > n_features:
+            raise ValueError(
+                "max_features is greater than the number of features: "
+                f"{max_features} > {n_features}."
+                "This should not be possible. Please submit a bug report."
+            )
+        self.max_features_per_set_ = np.asarray(max_features_per_set, dtype=np.intp)
+        # the total number of features to sample per split
+        self.max_features_ = np.sum(self.max_features_per_set_)
 
         if not isinstance(self.splitter, ObliqueSplitter):
             splitter = SPLITTERS[self.splitter](
diff --git a/sktree/tree/tests/test_multiview.py b/sktree/tree/tests/test_multiview.py
index 419ca378d..38541197b 100644
--- a/sktree/tree/tests/test_multiview.py
+++ b/sktree/tree/tests/test_multiview.py
@@ -102,7 +102,6 @@ def test_multiview_errors():
         random_state=seed,
         feature_set_ends=[3, 5],
         max_features=6,
-        apply_max_features_per_feature_set=True,
     )
     with pytest.raises(ValueError, match="the number of features in feature set"):
         clf.fit(X, y)
@@ -117,7 +116,6 @@ def test_multiview_separate_feature_set_sampling_sets_attributes():
         random_state=seed,
         feature_set_ends=[6, 10],
         max_features=0.5,
-        apply_max_features_per_feature_set=True,
     )
     clf.fit(X, y)
 
@@ -130,7 +128,6 @@ def test_multiview_separate_feature_set_sampling_sets_attributes():
         random_state=seed,
         feature_set_ends=[9, 13],
         max_features="sqrt",
-        apply_max_features_per_feature_set=True,
     )
     clf.fit(X, y)
     assert_array_equal(clf.max_features_per_set_, [3, 2])
@@ -142,7 +139,6 @@ def test_multiview_separate_feature_set_sampling_sets_attributes():
         random_state=seed,
         feature_set_ends=[5, 9],
         max_features="sqrt",
-        apply_max_features_per_feature_set=True,
     )
     clf.fit(X, y)
     assert_array_equal(clf.max_features_per_set_, [3, 2])
@@ -160,7 +156,6 @@ def test_at_least_one_feature_per_view_is_sampled():
         random_state=seed,
         feature_set_ends=[1, 2, 4, 10],
         max_features=0.4,
-        apply_max_features_per_feature_set=True,
     )
     clf.fit(X, y)
 
@@ -178,7 +173,6 @@ def test_multiview_separate_feature_set_sampling_is_consistent():
         random_state=seed,
         feature_set_ends=[1, 3, 6, 10],
         max_features=[1, 2, 2, 3],
-        apply_max_features_per_feature_set=True,
     )
     clf.fit(X, y)
 
@@ -192,15 +186,13 @@ def test_multiview_separate_feature_set_sampling_is_consistent():
         random_state=seed,
         feature_set_ends=[1, 3, 6, 10],
         max_features=[1, 2, 2, 3],
-        apply_max_features_per_feature_set=False,
     )
     other_clf.fit(X, y)
 
     assert_array_equal(other_clf.tree_.value, clf.tree_.value)
 
 
-@pytest.mark.parametrize("stratify_mtry_per_view", [True, False])
-def test_separate_mtry_per_feature_set(stratify_mtry_per_view):
+def test_separate_mtry_per_feature_set():
     """Test that multiview decision tree can sample different numbers of features per view.
 
     Sets the ``max_feature`` argument as an array-like.
@@ -213,7 +205,6 @@ def test_separate_mtry_per_feature_set(stratify_mtry_per_view):
         random_state=seed,
         feature_set_ends=[1, 2, 4, 10],
         max_features=[0.4, 0.5, 0.6, 0.7],
-        apply_max_features_per_feature_set=stratify_mtry_per_view,
     )
     clf.fit(X, y)
 
@@ -225,7 +216,6 @@ def test_separate_mtry_per_feature_set(stratify_mtry_per_view):
         random_state=seed,
         feature_set_ends=[1, 2, 4, 10],
         max_features=[1, 1, 1, 1.0],
-        apply_max_features_per_feature_set=stratify_mtry_per_view,
     )
     clf.fit(X, y)
     assert_array_equal(clf.max_features_per_set_, [1, 1, 1, 6])
@@ -236,14 +226,9 @@ def test_separate_mtry_per_feature_set(stratify_mtry_per_view):
         random_state=seed,
         feature_set_ends=[1, 2, 4, 10],
         max_features=1.0,
-        apply_max_features_per_feature_set=stratify_mtry_per_view,
     )
     clf.fit(X, y)
-    if stratify_mtry_per_view:
-        assert_array_equal(clf.max_features_per_set_, [1, 1, 2, 6])
-    else:
-        assert clf.max_features_per_set_ is None
-        assert clf.max_features_ == 10
+    assert_array_equal(clf.max_features_per_set_, [1, 1, 2, 6])
     assert clf.max_features_ == 10, np.sum(clf.max_features_per_set_)
 
 
@@ -262,9 +247,10 @@ def test_multiview_without_feature_view_stratification():
         random_state=seed,
         feature_set_ends=[497, 500],
         max_features=0.3,
-        apply_max_features_per_feature_set=False,
     )
     clf.fit(X, y)
 
-    assert clf.max_features_per_set_ is None
-    assert clf.max_features_ == 500 * clf.max_features, clf.max_features_
+    assert_array_equal(clf.max_features_per_set_, [150, 1]), clf.max_features_per_set_
+    assert clf.max_features_ == math.ceil(497.0 * clf.max_features) + math.ceil(
+        3 * clf.max_features
+    )

From 500dca969defaf97c85818d138f587db2e301c15 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 14 Mar 2024 13:13:57 -0400
Subject: [PATCH 03/26] add changelog

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v0.8.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 0a7ba4f58..183d217df 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -13,6 +13,12 @@ Version 0.8
 Changelog
 ---------
 
+- |API| :class:`sktree.tree.MultiViewDecisionTreeClassifier` do not have the
+    ``apply_max_features_per_feature_set`` argument anymore. Instead, the
+    ``max_features`` argument is used to control the number of features to
+    consider when looking for the best split within each feature set explicitly.
+    By `Adam Li`_ :pr:`#247`.
+
 Code and Documentation Contributors
 -----------------------------------
 

From 954c6fc49853e5d50884b46d452604de0ab930f2 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 15 Mar 2024 20:52:16 -0400
Subject: [PATCH 04/26] Fix examples

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .../plot_MI_imbalanced_hyppo_testing.py       |   1 -
 .../plot_co_MIGHT_alternative.py              |   1 -
 .../hypothesis_testing/plot_co_MIGHT_null.py  |   2 -
 .../plot_multiview_axis_aligned_splitter.py   |   3 -
 sktree/tree/__init__.py                       |   3 +-
 sktree/tree/_multiview.py                     | 468 ++++++++++++++++++
 sktree/tree/_oblique_splitter.pyx             |   2 +
 sktree/tree/tests/test_all_trees.py           |  10 +-
 8 files changed, 479 insertions(+), 11 deletions(-)

diff --git a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
index 95c5341ae..de3473c4d 100644
--- a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
+++ b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
@@ -130,7 +130,6 @@ def make_multiview_classification(
         max_features=max_features,
         tree_estimator=MultiViewDecisionTreeClassifier(
             feature_set_ends=n_features_views,
-            apply_max_features_per_feature_set=True,
         ),
         random_state=seed,
         honest_fraction=0.5,
diff --git a/examples/hypothesis_testing/plot_co_MIGHT_alternative.py b/examples/hypothesis_testing/plot_co_MIGHT_alternative.py
index fd33c335e..97c7b6611 100644
--- a/examples/hypothesis_testing/plot_co_MIGHT_alternative.py
+++ b/examples/hypothesis_testing/plot_co_MIGHT_alternative.py
@@ -112,7 +112,6 @@
         max_features=max_features,
         tree_estimator=MultiViewDecisionTreeClassifier(
             feature_set_ends=n_features_ends,
-            apply_max_features_per_feature_set=True,
         ),
         random_state=seed,
         honest_fraction=0.5,
diff --git a/examples/hypothesis_testing/plot_co_MIGHT_null.py b/examples/hypothesis_testing/plot_co_MIGHT_null.py
index 2e6325cd1..b6f2f9346 100644
--- a/examples/hypothesis_testing/plot_co_MIGHT_null.py
+++ b/examples/hypothesis_testing/plot_co_MIGHT_null.py
@@ -84,7 +84,6 @@
         max_features=max_features,
         tree_estimator=MultiViewDecisionTreeClassifier(
             feature_set_ends=n_features_ends,
-            apply_max_features_per_feature_set=True,
         ),
         random_state=seed,
         honest_fraction=0.5,
@@ -203,7 +202,6 @@
         max_features=max_features,
         tree_estimator=MultiViewDecisionTreeClassifier(
             feature_set_ends=n_features_ends,
-            apply_max_features_per_feature_set=True,
         ),
         random_state=seed,
         honest_fraction=0.5,
diff --git a/examples/splitters/plot_multiview_axis_aligned_splitter.py b/examples/splitters/plot_multiview_axis_aligned_splitter.py
index 064000153..00b8c0280 100644
--- a/examples/splitters/plot_multiview_axis_aligned_splitter.py
+++ b/examples/splitters/plot_multiview_axis_aligned_splitter.py
@@ -127,9 +127,6 @@
 # more than the second feature set, we can specify ``max_features_per_set`` as follows:
 # ``max_features_per_set = [3, 1]``. This will sample from the first feature set three times
 # and the second feature set once.
-#
-# .. note:: In practice, this is controlled by the ``apply_max_features_per_feature_set`` parameter
-#   in :class:`sktree.tree.MultiViewDecisionTreeClassifier`.
 
 max_features_per_set_ = np.array([1, 2, 3], dtype=int)
 max_features = np.sum(max_features_per_set_)
diff --git a/sktree/tree/__init__.py b/sktree/tree/__init__.py
index 797338ac3..dc5465a60 100644
--- a/sktree/tree/__init__.py
+++ b/sktree/tree/__init__.py
@@ -15,7 +15,7 @@
     UnsupervisedObliqueDecisionTree,
 )
 from ._honest_tree import HonestTreeClassifier
-from ._multiview import MultiViewDecisionTreeClassifier
+from ._multiview import MultiViewDecisionTreeClassifier, MultiViewObliqueDecisionTreeClassifier
 from ._neighbors import compute_forest_similarity_matrix
 
 __all__ = [
@@ -34,4 +34,5 @@
     "ExtraTreeClassifier",
     "ExtraTreeRegressor",
     "MultiViewDecisionTreeClassifier",
+    "MultiViewObliqueDecisionTreeClassifier",
 ]
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 81aa24963..8be7ed74c 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -35,6 +35,10 @@
     "best": _oblique_splitter.MultiViewSplitter,
 }
 
+OBLIQUE_DENSE_SPLITTERS = {
+    "best": _oblique_splitter.MultiViewObliqueSplitter,
+}
+
 
 class MultiViewDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
     """A multi-view axis-aligned decision tree classifier.
@@ -547,3 +551,467 @@ def _inheritable_fitted_attribute(self):
             "n_features_in_set_",
             "max_features_per_set_",
         ]
+
+
+class MultiViewObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
+    """A multi-view OBLIQUE decision tree classifier.
+
+    This is an experimental feature that applies an oblique decision tree to
+    multiple feature-sets concatenated across columns in ``X``.
+
+    Parameters
+    ----------
+    criterion : {"gini", "entropy"}, default="gini"
+        The function to measure the quality of a split. Supported criteria are
+        "gini" for the Gini impurity and "entropy" for the information gain.
+
+    splitter : {"best"}, default="best"
+        The strategy used to choose the split at each node.
+
+    max_depth : int, default=None
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+
+    min_samples_split : int or float, default=2
+        The minimum number of samples required to split an internal node:
+
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
+          `ceil(min_samples_split * n_samples)` are the minimum
+          number of samples for each split.
+
+    min_samples_leaf : int or float, default=1
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
+          `ceil(min_samples_leaf * n_samples)` are the minimum
+          number of samples for each node.
+
+    min_weight_fraction_leaf : float, default=0.0
+        The minimum weighted fraction of the sum total of weights (of all
+        the input samples) required to be at a leaf node. Samples have
+        equal weight when sample_weight is not provided.
+
+    max_features : array-like, int, float or {"auto", "sqrt", "log2"}, default=None
+        The number of features to consider when looking for the best split:
+
+            - If int, then consider `max_features` features at each split.
+            - If float, then `max_features` is a fraction and
+              `int(max_features * n_features)` features are considered at each
+              split.
+            - If "auto", then `max_features=sqrt(n_features)`.
+            - If "sqrt", then `max_features=sqrt(n_features)`.
+            - If "log2", then `max_features=log2(n_features)`.
+            - If None, then `max_features=n_features`.
+
+        If array-like, then `max_features` is the number of features to consider
+        for each feature set following the same logic as above, where
+        ``n_features`` is the number of features in the respective feature set.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+        Note: Compared to axis-aligned Random Forests, one can set
+        max_features to a number greater then ``n_features``.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the randomness of the estimator. The features are always
+        randomly permuted at each split, even if ``splitter`` is set to
+        ``"best"``. When ``max_features < n_features``, the algorithm will
+        select ``max_features`` at random at each split before finding the best
+        split among them. But the best found split may vary across different
+        runs, even if ``max_features=n_features``. That is the case, if the
+        improvement of the criterion is identical for several splits and one
+        split has to be selected at random. To obtain a deterministic behaviour
+        during fitting, ``random_state`` has to be fixed to an integer.
+        See :term:`Glossary <random_state>` for details.
+
+    max_leaf_nodes : int, default=None
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+
+    min_impurity_decrease : float, default=0.0
+        A node will be split if this split induces a decrease of the impurity
+        greater than or equal to this value.
+
+        The weighted impurity decrease equation is the following::
+
+            N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                - N_t_L / N_t * left_impurity)
+
+        where ``N`` is the total number of samples, ``N_t`` is the number of
+        samples at the current node, ``N_t_L`` is the number of samples in the
+        left child, and ``N_t_R`` is the number of samples in the right child.
+
+        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+        if ``sample_weight`` is passed.
+
+    class_weight : dict, list of dict or "balanced", default=None
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If None, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
+        [{1:1}, {2:5}, {3:1}, {4:1}].
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
+    ccp_alpha : non-negative float, default=0.0
+        Not used.
+
+    store_leaf_values : bool, default=False
+        Whether to store the leaf values.
+
+    monotonic_cst : array-like of int of shape (n_features), default=None
+        Indicates the monotonicity constraint to enforce on each feature.
+          - 1: monotonic increase
+          - 0: no constraint
+          - -1: monotonic decrease
+
+        Not used.
+
+    feature_set_ends : array-like of int of shape (n_feature_sets,), default=None
+        The indices of the end of each feature set. For example, if the first
+        feature set is the first 10 features, and the second feature set is the
+        next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
+        then this will assume that there is only one feature set.
+
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,) or list of ndarray
+        The classes labels (single output problem),
+        or a list of arrays of class labels (multi-output problem).
+
+    feature_importances_ : ndarray of shape (n_features,)
+        The impurity-based feature importances.
+        The higher, the more important the feature.
+        The importance of a feature is computed as the (normalized)
+        total reduction of the criterion brought by that feature.  It is also
+        known as the Gini importance [4]_.
+
+        Warning: impurity-based feature importances can be misleading for
+        high cardinality features (many unique values). See
+        :func:`sklearn.inspection.permutation_importance` as an alternative.
+
+    max_features_ : int
+        The inferred value of max_features.
+
+    n_classes_ : int or list of int
+        The number of classes (for single output problems),
+        or a list containing the number of classes for each
+        output (for multi-output problems).
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    n_outputs_ : int
+        The number of outputs when ``fit`` is performed.
+
+    tree_ : Tree instance
+        The underlying Tree object. Please refer to
+        ``help(sklearn.tree._tree.Tree)`` for
+        attributes of Tree object.
+
+    feature_set_ends_ : array-like of int of shape (n_feature_sets,)
+        The indices of the end of each feature set.
+
+    n_feature_sets_ : int
+        The number of feature sets.
+
+    max_features_per_set_ : array-like of int of shape (n_feature_sets,)
+        The number of features to sample per feature set. If ``None``, then
+        ``max_features`` is applied to the entire feature space.
+
+    See Also
+    --------
+    sklearn.tree.DecisionTreeClassifier : An axis-aligned decision tree classifier.
+    """
+
+    tree_type = "oblique"
+
+    _parameter_constraints = {
+        **DecisionTreeClassifier._parameter_constraints,
+        "feature_set_ends": ["array-like", None],
+        "feature_combinations": [
+            Interval(Real, 1.0, None, closed="left"),
+            None,
+        ],
+    }
+    _parameter_constraints.pop("max_features")
+    _parameter_constraints["max_features"] = [
+        Interval(Integral, 1, None, closed="left"),
+        Interval(RealNotInt, 0.0, 1.0, closed="right"),
+        StrOptions({"sqrt", "log2"}),
+        "array-like",
+        None,
+    ]
+
+    def __init__(
+        self,
+        *,
+        criterion="gini",
+        splitter="best",
+        max_depth=None,
+        min_samples_split=2,
+        min_samples_leaf=1,
+        min_weight_fraction_leaf=0.0,
+        max_features=None,
+        random_state=None,
+        max_leaf_nodes=None,
+        min_impurity_decrease=0.0,
+        class_weight=None,
+        ccp_alpha=0.0,
+        store_leaf_values=False,
+        monotonic_cst=None,
+        feature_set_ends=None,
+        feature_combinations=None,
+    ):
+        super().__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            max_leaf_nodes=max_leaf_nodes,
+            class_weight=class_weight,
+            random_state=random_state,
+            min_impurity_decrease=min_impurity_decrease,
+            ccp_alpha=ccp_alpha,
+            store_leaf_values=store_leaf_values,
+            monotonic_cst=monotonic_cst,
+        )
+
+        self.feature_set_ends = feature_set_ends
+        self.feature_combinations = feature_combinations
+        self._max_features_arr = None
+
+    def _build_tree(
+        self,
+        X,
+        y,
+        sample_weight,
+        missing_values_in_feature_mask,
+        min_samples_leaf,
+        min_weight_leaf,
+        max_leaf_nodes,
+        min_samples_split,
+        max_depth,
+        random_state,
+    ):
+        """Build the actual tree.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csc_matrix``.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            The target values (class labels) as integers or strings.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. Splits are also
+            ignored if they would result in any single class carrying a
+            negative weight in either child node.
+
+        min_samples_leaf : int or float
+            The minimum number of samples required to be at a leaf node.
+
+        min_weight_leaf : float, default=0.0
+           The minimum weighted fraction of the sum total of weights.
+
+        max_leaf_nodes : int, default=None
+            Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+
+        min_samples_split : int or float, default=2
+            The minimum number of samples required to split an internal node.
+
+        max_depth : int, default=None
+            The maximum depth of the tree. If None, then nodes are expanded until
+            all leaves are pure or until all leaves contain less than
+            min_samples_split samples.
+
+        random_state : int, RandomState instance or None, default=None
+            Controls the randomness of the estimator.
+        """
+        monotonic_cst = None
+        _, n_features = X.shape
+
+        self.feature_combinations_ = (
+            self.feature_combinations if self.feature_combinations is not None else 1.5
+        )
+
+        # Build tree
+        criterion = self.criterion
+        if not isinstance(criterion, BaseCriterion):
+            criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self.n_classes_)
+        else:
+            # Make a deepcopy in case the criterion has mutable attributes that
+            # might be shared and modified concurrently during parallel fitting
+            criterion = copy.deepcopy(criterion)
+
+        if self.feature_set_ends is None:
+            self.feature_set_ends_ = np.asarray([n_features], dtype=np.intp)
+        else:
+            self.feature_set_ends_ = np.atleast_1d(self.feature_set_ends).astype(np.intp)
+        self.n_feature_sets_ = len(self.feature_set_ends_)
+        if self.feature_set_ends_[-1] != n_features:
+            raise ValueError(
+                f"The last feature set end must be equal to the number of features, "
+                f"{n_features}, but got {self.feature_set_ends_[-1]}."
+            )
+
+        splitter = self.splitter
+        if issparse(X):
+            raise ValueError(
+                "Sparse input is not supported for oblique trees. "
+                "Please convert your data to a dense array."
+            )
+
+        if isinstance(self._max_features_arr, (Integral, Real, str, type(None))):
+            max_features_arr_ = [self._max_features_arr] * self.n_feature_sets_
+        else:
+            if not isinstance(self._max_features_arr, (list, np.ndarray)):
+                raise ValueError(
+                    f"max_features must be an array-like, int, float, str, or None; "
+                    f"got {type(self._max_features_arr)}"
+                )
+            if len(self._max_features_arr) != self.n_feature_sets_:
+                raise ValueError(
+                    f"max_features must be an array-like of length {self.n_feature_sets_}; "
+                    f"got {len(self.max_features)}"
+                )
+            max_features_arr_ = self._max_features_arr
+
+        self.n_features_in_set_ = []
+        # XXX: experimental
+        # we can replace max_features_ here based on whether or not uniform logic over
+        # feature sets
+        max_features_per_set = []
+        n_features_in_prev = 0
+        for idx in range(self.n_feature_sets_):
+            max_features = max_features_arr_[idx]
+
+            n_features_in_ = self.feature_set_ends_[idx] - n_features_in_prev
+            n_features_in_prev += n_features_in_
+            self.n_features_in_set_.append(n_features_in_)
+            if isinstance(max_features, str):
+                if max_features == "sqrt":
+                    max_features = max(1, math.ceil(np.sqrt(n_features_in_)))
+                elif max_features == "log2":
+                    max_features = max(1, math.ceil(np.log2(n_features_in_)))
+            elif max_features is None:
+                max_features = n_features_in_
+            elif isinstance(max_features, numbers.Integral):
+                max_features = max_features
+            else:  # float
+                if max_features > 0.0:
+                    max_features = max(1, math.ceil(max_features * n_features_in_))
+                else:
+                    max_features = 0
+
+            if max_features > n_features_in_:
+                raise ValueError(
+                    f"max_features must be less than or equal to "
+                    f"the number of features in feature set {idx}: {n_features_in_}, but "
+                    f"max_features = {max_features} when applying sampling"
+                    f"per feature set."
+                )
+
+            max_features_per_set.append(max_features)
+        self.max_features_ = np.sum(max_features_per_set)
+        if self.max_features_ > n_features:
+            raise ValueError(
+                "max_features is greater than the number of features: "
+                f"{max_features} > {n_features}."
+                "This should not be possible. Please submit a bug report."
+            )
+        self.max_features_per_set_ = np.asarray(max_features_per_set, dtype=np.intp)
+        # the total number of features to sample per split
+        self.max_features_ = np.sum(self.max_features_per_set_)
+
+        if not isinstance(self.splitter, ObliqueSplitter):
+            splitter = OBLIQUE_DENSE_SPLITTERS[self.splitter](
+                criterion,
+                self.max_features_,
+                min_samples_leaf,
+                min_weight_leaf,
+                random_state,
+                monotonic_cst,
+                self.feature_combinations_,
+                self.feature_set_ends_,
+                self.n_feature_sets_,
+                self.max_features_per_set_,
+            )
+
+        self.tree_ = ObliqueTree(self.n_features_in_, self.n_classes_, self.n_outputs_)
+
+        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
+        if max_leaf_nodes < 0:
+            self.builder_ = DepthFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                self.min_impurity_decrease,
+            )
+        else:
+            self.builder_ = BestFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                max_leaf_nodes,
+                self.min_impurity_decrease,
+            )
+
+        self.builder_.build(self.tree_, X, y, sample_weight, None)
+
+        if self.n_outputs_ == 1:
+            self.n_classes_ = self.n_classes_[0]
+            self.classes_ = self.classes_[0]
+
+    @property
+    def _inheritable_fitted_attribute(self):
+        """Define additional attributes to pass onto a parent meta tree-estimator.
+
+        Used for passing parameters to HonestTreeClassifier.
+        """
+        return [
+            "max_features_",
+            "feature_set_ends_",
+            "n_feature_sets_",
+            "n_features_in_set_",
+            "max_features_per_set_",
+            "feature_combinations_",
+        ]
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index e6d978658..a3b9c6771 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -789,6 +789,8 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                     break
 
 
+# TODO: need to check segfault for multiview oblique splitter
+# REBUILD WITH BOUNDS CHECK
 cdef class MultiViewObliqueSplitter(MultiViewSplitter):
     def __cinit__(
         self,
diff --git a/sktree/tree/tests/test_all_trees.py b/sktree/tree/tests/test_all_trees.py
index 66a9ea307..c5a7708b5 100644
--- a/sktree/tree/tests/test_all_trees.py
+++ b/sktree/tree/tests/test_all_trees.py
@@ -2,13 +2,15 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_almost_equal, assert_array_equal
-from sklearn.base import is_classifier
+from sklearn.base import is_classifier, is_regressor
 from sklearn.datasets import make_blobs
 from sklearn.tree._tree import TREE_LEAF
 
 from sktree.tree import (
     ExtraObliqueDecisionTreeClassifier,
     ExtraObliqueDecisionTreeRegressor,
+    MultiViewDecisionTreeClassifier,
+    MultiViewObliqueDecisionTreeClassifier,
     ObliqueDecisionTreeClassifier,
     ObliqueDecisionTreeRegressor,
     PatchObliqueDecisionTreeClassifier,
@@ -26,6 +28,8 @@
     PatchObliqueDecisionTreeClassifier,
     UnsupervisedDecisionTree,
     UnsupervisedObliqueDecisionTree,
+    MultiViewDecisionTreeClassifier,
+    MultiViewObliqueDecisionTreeClassifier,
 ]
 
 
@@ -121,7 +125,7 @@ def assert_tree_equal(d, s, message):
 
 @pytest.mark.parametrize(
     "TREE",
-    [ObliqueDecisionTreeClassifier, UnsupervisedDecisionTree, UnsupervisedObliqueDecisionTree],
+    ALL_TREES,
 )
 def test_tree_deserialization_from_read_only_buffer(tmpdir, TREE):
     """Check that Trees can be deserialized with read only buffers.
@@ -131,7 +135,7 @@ def test_tree_deserialization_from_read_only_buffer(tmpdir, TREE):
     pickle_path = str(tmpdir.join("clf.joblib"))
     clf = TREE(random_state=0)
 
-    if is_classifier(TREE):
+    if is_classifier(TREE) or is_regressor(TREE):
         clf.fit(X_small, y_small)
     else:
         clf.fit(X_small)

From e6ea30bef2966faffb97edb5721790a98746a327 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 24 Jun 2024 17:23:16 -0400
Subject: [PATCH 05/26] Almost working

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/tree/_oblique_splitter.pyx | 32 ++++++++++++++++++-------------
 test_mvoblique_tree.py            |  1 +
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index f9e5a8edd..8dd6ea704 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -331,6 +331,9 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
         # Sample the projection matrix
         self.sample_proj_mat(self.proj_mat_weights, self.proj_mat_indices)
 
+        with gil:
+            print("Finished sampling projection matrix")
+
         # For every vector in the projection matrix
         for feat_i in range(max_features):
             # Projection vector has no nonzeros
@@ -724,15 +727,18 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         # create a helper array for allowing efficient Fisher-Yates
         self.multi_indices_to_sample = vector[vector[intp_t]](self.n_feature_sets)
 
+        # create a helper array for allowing efficient Fisher-Yates
         cdef intp_t i_feature = 0
         cdef intp_t feature_set_begin = 0
-        cdef intp_t size_of_feature_set
+        cdef intp_t size_of_feature_set, size_of_sampling
         cdef intp_t ifeat = 0
         for i_feature in range(self.n_feature_sets):
             size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
-            for ifeat in range(size_of_feature_set):
-                self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
+            size_of_sampling = self.max_features_per_set[i_feature] * size_of_feature_set
 
+            # push an index corresponding to each element we want to sample
+            for ifeat in range(size_of_sampling):
+                self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
             feature_set_begin = self.feature_set_ends[i_feature]
         return 0
 
@@ -826,8 +832,6 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         for i in range(self.n_feature_sets):
             n_non_zeros_per_set[i] = <intp_t> (self.max_features_per_set[i] * self.feature_combinations)
         self.n_non_zeros_per_set = n_non_zeros_per_set
-        with gil:
-            print("Initialized")
 
     cdef void sample_proj_mat(
         self,
@@ -850,19 +854,14 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         cdef intp_t grid_size
 
         # keep track of the beginning and ending indices of each feature set
-        cdef intp_t feature_set_begin, feature_set_end, idx
-        feature_set_begin = 0
+        cdef intp_t idx
 
         # 02: Algorithm samples feature combinations from each feature set uniformly and evaluates
         # them independently.
-        feature_set_begin = 0
-
         with gil:
-            print("Starting to sample projection matrix")
+            print("Starting to sample projection matrix", self.n_feature_sets)
         # sample from a feature set using linear combinations among the two sets
         for idx in range(self.n_feature_sets):
-            feature_set_end = self.feature_set_ends[idx]
-
             # indices to sample is a 1D-index array of size (max_features * n_features_in_set)
             # which is Fisher-Yates shuffled to sample random features in each feature set
             indices_to_sample = self.multi_indices_to_sample[idx]
@@ -874,6 +873,11 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
                 indices_to_sample[j], indices_to_sample[i] = \
                     indices_to_sample[i], indices_to_sample[j]
 
+            with gil:
+                print(idx, "Finished fisher yates...")
+                print(len(self.n_non_zeros_per_set), len(self.max_features_per_set), len(self.multi_indices_to_sample))
+                print(len(indices_to_sample), grid_size, self.n_non_zeros_per_set[idx])
+
             # we want "n_non_zeros / K" for this feature set over K feature sets
             for i in range(0, self.n_non_zeros_per_set[idx]):
                 # get the next index from the shuffled index array
@@ -887,11 +891,13 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
                 # sample a random weight
                 weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
 
+                # with gil:
+                #     print(i, proj_i, feat_i)
                 proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
                 proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
             # the new beginning is the previous end
-            feature_set_begin = feature_set_end
+            # feature_set_begin = feature_set_end
 
 
 cdef class MultiViewSplitterTester(MultiViewSplitter):
diff --git a/test_mvoblique_tree.py b/test_mvoblique_tree.py
index 02c81d395..41fd7c357 100644
--- a/test_mvoblique_tree.py
+++ b/test_mvoblique_tree.py
@@ -52,6 +52,7 @@
     feature_set_ends=[n_features_1, X.shape[1]],
     max_features=0.3,
 )
+print(X.shape)
 clf.fit(X, y)
 assert (
     accuracy_score(y, clf.predict(X)) == 1.0

From 121868020f402bca68c588a458827a1882594bcf Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 24 Jun 2024 17:41:04 -0400
Subject: [PATCH 06/26] Fix changelog

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v0.8.rst | 5 -----
 doc/whats_new/v0.9.rst | 6 +++++-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst
index 521c80ecd..a0949489d 100644
--- a/doc/whats_new/v0.8.rst
+++ b/doc/whats_new/v0.8.rst
@@ -32,11 +32,6 @@ Changelog
     estimated on oob samples were biased when there was a low number of samples
     due to imbalance in the classes when ``bootstrap=True``.
     By `Adam Li`_ (:pr:`#283`)
-- |API| :class:`sktree.tree.MultiViewDecisionTreeClassifier` do not have the
-    ``apply_max_features_per_feature_set`` argument anymore. Instead, the
-    ``max_features`` argument is used to control the number of features to
-    consider when looking for the best split within each feature set explicitly.
-    By `Adam Li`_ :pr:`#247`.
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
index 9c5ffb3b2..696929b5e 100644
--- a/doc/whats_new/v0.9.rst
+++ b/doc/whats_new/v0.9.rst
@@ -13,7 +13,11 @@ Version 0.9
 Changelog
 ---------
 
-- 
+- |API| :class:`sktree.tree.MultiViewDecisionTreeClassifier` do not have the
+    ``apply_max_features_per_feature_set`` argument anymore. Instead, the
+    ``max_features`` argument is used to control the number of features to
+    consider when looking for the best split within each feature set explicitly.
+    By `Adam Li`_ :pr:`#247`.
 
 Code and Documentation Contributors
 -----------------------------------

From bba2e8fb9916d61de40bf0f1645b66c4fcf454e9 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 26 Jun 2024 08:59:53 -0400
Subject: [PATCH 07/26] Update submodule

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/_lib/sklearn_fork            |  2 +-
 sktree/tree/_oblique_splitter.pxd   | 12 ++--
 sktree/tree/_oblique_splitter.pyx   | 30 ++++-----
 sktree/tree/tests/test_all_trees.py |  5 +-
 test_mvoblique_tree.py              | 98 +++++++++++++++++------------
 5 files changed, 87 insertions(+), 60 deletions(-)

diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index 74b2e699a..d455aa16e 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit 74b2e699a2607b190ce6fc49b7625231023989c0
+Subproject commit d455aa16ee9cc42ce342dd07d9b94db117783fcc
diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
index a7a2dcfd7..fa83d7416 100644
--- a/sktree/tree/_oblique_splitter.pxd
+++ b/sktree/tree/_oblique_splitter.pxd
@@ -95,9 +95,13 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
     # to split the samples samples[start:end].
 
     # Oblique Splitting extra parameters
-    cdef public float64_t feature_combinations             # Number of features to combine
+    cdef public float64_t feature_combinations          # Number of features to combine
     cdef intp_t n_non_zeros                             # Number of non-zero features
-    cdef intp_t[::1] indices_to_sample                  # an array of indices to sample of size mtry X n_features
+    cdef intp_t[::1] indices_to_sample                  # An array of indices to sample of size mtry X n_features
+    #                                                   # to sample from that produces a non-zero feature combination.
+    #                                                   # This array is multiplied by the data matrix n_samples X n_features
+    #                                                   # to produce a non-zero feature combination of size
+    #                                                   # n_samples X mtry.
 
     # All oblique splitters (i.e. non-axis aligned splitters) require a
     # function to sample a projection matrix that is applied to the feature matrix
@@ -139,10 +143,10 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
 
 # XXX: This splitter is experimental. Expect changes frequently.
 cdef class MultiViewSplitter(BestObliqueSplitter):
-    cdef const intp_t[:] feature_set_ends   # an array indicating the column indices of the end of each feature set
+    cdef const intp_t[:] feature_set_ends       # an array indicating the column indices of the end of each feature set
     cdef intp_t n_feature_sets                  # the number of feature sets is the length of feature_set_ends + 1
 
-    cdef const intp_t[:] max_features_per_set  # the maximum number of features to sample from each feature set
+    cdef const intp_t[:] max_features_per_set   # the maximum number of features to sample from each feature set
 
     cdef vector[vector[intp_t]] multi_indices_to_sample
 
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index 8dd6ea704..4e4f0860c 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -331,8 +331,8 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
         # Sample the projection matrix
         self.sample_proj_mat(self.proj_mat_weights, self.proj_mat_indices)
 
-        with gil:
-            print("Finished sampling projection matrix")
+        # with gil:
+        #     print("Finished sampling projection matrix")
 
         # For every vector in the projection matrix
         for feat_i in range(max_features):
@@ -733,12 +733,15 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         cdef intp_t size_of_feature_set, size_of_sampling
         cdef intp_t ifeat = 0
         for i_feature in range(self.n_feature_sets):
+            # n_features * max_features_per_set
             size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
             size_of_sampling = self.max_features_per_set[i_feature] * size_of_feature_set
 
             # push an index corresponding to each element we want to sample
             for ifeat in range(size_of_sampling):
                 self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
+
+                print(i_feature, ifeat + feature_set_begin)
             feature_set_begin = self.feature_set_ends[i_feature]
         return 0
 
@@ -791,9 +794,15 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                     # here, axis-aligned splits are entirely weights of 1
                     weight = 1  # if (rand_int(0, 2, random_state) == 1) else -1
 
-                    proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
+                    proj_mat_indices[proj_i].push_back(feat_i)  # Store vectorized index of nonzero
                     proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
+                    # XXX: debug only
+                    if feat_i > self.n_features:
+                        with gil:
+                            print(idx, ifeature, proj_i, self.n_samples, self.n_features, feat_i)
+
+                    # break early if we've sampled enough features
                     proj_i += 1
                     if proj_i >= self.max_features:
                         break
@@ -858,8 +867,6 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
 
         # 02: Algorithm samples feature combinations from each feature set uniformly and evaluates
         # them independently.
-        with gil:
-            print("Starting to sample projection matrix", self.n_feature_sets)
         # sample from a feature set using linear combinations among the two sets
         for idx in range(self.n_feature_sets):
             # indices to sample is a 1D-index array of size (max_features * n_features_in_set)
@@ -873,10 +880,10 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
                 indices_to_sample[j], indices_to_sample[i] = \
                     indices_to_sample[i], indices_to_sample[j]
 
-            with gil:
-                print(idx, "Finished fisher yates...")
-                print(len(self.n_non_zeros_per_set), len(self.max_features_per_set), len(self.multi_indices_to_sample))
-                print(len(indices_to_sample), grid_size, self.n_non_zeros_per_set[idx])
+            # with gil:
+            #     print(idx, "Finished fisher yates...")
+            #     print(len(self.n_non_zeros_per_set), len(self.max_features_per_set), len(self.multi_indices_to_sample))
+            #     print(len(indices_to_sample), grid_size, self.n_non_zeros_per_set[idx])
 
             # we want "n_non_zeros / K" for this feature set over K feature sets
             for i in range(0, self.n_non_zeros_per_set[idx]):
@@ -891,14 +898,9 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
                 # sample a random weight
                 weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
 
-                # with gil:
-                #     print(i, proj_i, feat_i)
                 proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
                 proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
-            # the new beginning is the previous end
-            # feature_set_begin = feature_set_end
-
 
 cdef class MultiViewSplitterTester(MultiViewSplitter):
     """A class to expose a Python interface for testing."""
diff --git a/sktree/tree/tests/test_all_trees.py b/sktree/tree/tests/test_all_trees.py
index 6d06e0fcd..f161cc57b 100644
--- a/sktree/tree/tests/test_all_trees.py
+++ b/sktree/tree/tests/test_all_trees.py
@@ -10,6 +10,7 @@
     ExtraObliqueDecisionTreeClassifier,
     ExtraObliqueDecisionTreeRegressor,
     MultiViewDecisionTreeClassifier,
+    MultiViewObliqueDecisionTreeClassifier,
     ObliqueDecisionTreeClassifier,
     ObliqueDecisionTreeRegressor,
     PatchObliqueDecisionTreeClassifier,
@@ -28,7 +29,7 @@
     UnsupervisedDecisionTree,
     UnsupervisedObliqueDecisionTree,
     MultiViewDecisionTreeClassifier,
-    # MultiViewObliqueDecisionTreeClassifier,
+    MultiViewObliqueDecisionTreeClassifier,
 ]
 
 
@@ -122,6 +123,7 @@ def assert_tree_equal(d, s, message):
 ]
 
 
+@pytest.mark.skip()
 @pytest.mark.parametrize(
     "TREE",
     ALL_TREES,
@@ -135,6 +137,7 @@ def test_tree_deserialization_from_read_only_buffer(tmpdir, TREE):
     clf = TREE(random_state=0)
 
     if is_classifier(TREE) or is_regressor(TREE):
+        print(X_small.shape)
         clf.fit(X_small, y_small)
     else:
         clf.fit(X_small)
diff --git a/test_mvoblique_tree.py b/test_mvoblique_tree.py
index 41fd7c357..c72aaba1b 100644
--- a/test_mvoblique_tree.py
+++ b/test_mvoblique_tree.py
@@ -18,45 +18,63 @@
 
 rng = np.random.default_rng(seed=seed)
 
-n_samples = 20
-n_features_1 = 5
-n_features_2 = 1000
-cluster_std = 5.0
-
-# Create a high-dimensional multiview dataset with a low-dimensional informative
-# subspace in one view of the dataset.
-X0_first, y0 = make_blobs(
-    n_samples=n_samples,
-    cluster_std=cluster_std,
-    n_features=n_features_1,
-    random_state=rng.integers(1, 10000),
-    centers=1,
-)
 
-X1_first, y1 = make_blobs(
-    n_samples=n_samples,
-    cluster_std=cluster_std,
-    n_features=n_features_1,
-    random_state=rng.integers(1, 10000),
-    centers=1,
-)
-y1[:] = 1
-X0 = np.concatenate([X0_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
-X1 = np.concatenate([X1_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
-X = np.vstack((X0, X1))
-y = np.hstack((y0, y1)).T
-
-# Compare multiview decision tree vs single-view decision tree
-clf = MultiViewObliqueDecisionTreeClassifier(
-    random_state=seed,
-    feature_set_ends=[n_features_1, X.shape[1]],
-    max_features=0.3,
+X_small = np.array(
+    [
+        [0, 0, 4, 0, 0, 0, 1, -14, 0, -4, 0, 0, 0, 0],
+        [0, 0, 5, 3, 0, -4, 0, 0, 1, -5, 0.2, 0, 4, 1],
+        [-1, -1, 0, 0, -4.5, 0, 0, 2.1, 1, 0, 0, -4.5, 0, 1],
+        [-1, -1, 0, -1.2, 0, 0, 0, 0, 0, 0, 0.2, 0, 0, 1],
+        [-1, -1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1],
+        [-1, -2, 0, 4, -3, 10, 4, 0, -3.2, 0, 4, 3, -4, 1],
+        [2.11, 0, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0.5, 0, -3, 1],
+        [2.11, 0, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0, 0, -2, 1],
+        [2.11, 8, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0, 0, -2, 1],
+        [2.11, 8, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0.5, 0, -1, 0],
+        [2, 8, 5, 1, 0.5, -4, 10, 0, 1, -5, 3, 0, 2, 0],
+        [2, 0, 1, 1, 1, -1, 1, 0, 0, -2, 3, 0, 1, 0],
+        [2, 0, 1, 2, 3, -1, 10, 2, 0, -1, 1, 2, 2, 0],
+        [1, 1, 0, 2, 2, -1, 1, 2, 0, -5, 1, 2, 3, 0],
+        [3, 1, 0, 3, 0, -4, 10, 0, 1, -5, 3, 0, 3, 1],
+        [2.11, 8, -6, -0.5, 0, 1, 0, 0, -3.2, 6, 0.5, 0, -3, 1],
+        [2.11, 8, -6, -0.5, 0, 1, 0, 0, -3.2, 6, 1.5, 1, -1, -1],
+        [2.11, 8, -6, -0.5, 0, 10, 0, 0, -3.2, 6, 0.5, 0, -1, -1],
+        [2, 0, 5, 1, 0.5, -2, 10, 0, 1, -5, 3, 1, 0, -1],
+        [2, 0, 1, 1, 1, -2, 1, 0, 0, -2, 0, 0, 0, 1],
+        [2, 1, 1, 1, 2, -1, 10, 2, 0, -1, 0, 2, 1, 1],
+        [1, 1, 0, 0, 1, -3, 1, 2, 0, -5, 1, 2, 1, 1],
+        [3, 1, 0, 1, 0, -4, 1, 0, 1, -2, 0, 0, 1, 0],
+    ]
 )
-print(X.shape)
-clf.fit(X, y)
-assert (
-    accuracy_score(y, clf.predict(X)) == 1.0
-), f"Accuracy score: {accuracy_score(y, clf.predict(X))}"
-assert (
-    cross_val_score(clf, X, y, cv=5).mean() > 0.9
-), f"CV score: {cross_val_score(clf, X, y, cv=5).mean()}"
+
+y_small = [1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
+y_small_reg = [
+    1.0,
+    2.1,
+    1.2,
+    0.05,
+    10,
+    2.4,
+    3.1,
+    1.01,
+    0.01,
+    2.98,
+    3.1,
+    1.1,
+    0.0,
+    1.2,
+    2,
+    11,
+    0,
+    0,
+    4.5,
+    0.201,
+    1.06,
+    0.9,
+    0,
+]
+
+clf = MultiViewDecisionTreeClassifier(random_state=0)
+
+print(X_small.shape)
+clf.fit(X_small, y_small)

From 01cab41c09c23842ad42226c2d8dca8d0373244a Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 3 Jul 2024 17:15:17 -0400
Subject: [PATCH 08/26] WIP

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 benchmarks_nonasv/bench_forestht.py |  2 +
 sktree/stats/forestht.py            |  1 +
 sktree/tree/_multiview.py           |  1 +
 sktree/tree/_oblique_splitter.pxd   | 10 +++-
 sktree/tree/_oblique_splitter.pyx   | 69 +++++++++++++++------
 sktree/tree/tests/test_multiview.py |  2 +
 test_mvoblique_tree.py              | 93 +++++++++++------------------
 7 files changed, 99 insertions(+), 79 deletions(-)

diff --git a/benchmarks_nonasv/bench_forestht.py b/benchmarks_nonasv/bench_forestht.py
index 59e4dff9b..2bf0e6926 100644
--- a/benchmarks_nonasv/bench_forestht.py
+++ b/benchmarks_nonasv/bench_forestht.py
@@ -13,6 +13,8 @@
 import seaborn as sns
 from scipy.special import expit
 
+# using an outdated API, but the code could get refactored to use our new API
+# build_coleman_forest, build_oob_forest, etc.
 from sktree.stats import PermutationForestClassifier, PermutationForestRegressor
 
 seed = 12345
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index b71081806..163341303 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -140,6 +140,7 @@ def build_coleman_forest(
 
     if y.ndim == 1:
         y = y.reshape(-1, 1)
+
     metric_star, metric_star_pi = _compute_null_distribution_coleman(
         y,
         orig_forest_proba,
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 6ac4ad494..3934d3e31 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -441,6 +441,7 @@ def _build_tree(
         # the total number of features to sample per split
         self.max_features_ = np.sum(self.max_features_per_set_)
 
+        print(self.max_features_, self.max_features_per_set_, self.feature_set_ends_, self.n_features_in_set_)
         if not isinstance(self.splitter, ObliqueSplitter):
             splitter = SPLITTERS[self.splitter](
                 criterion,
diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
index fa83d7416..9f6df2d3d 100644
--- a/sktree/tree/_oblique_splitter.pxd
+++ b/sktree/tree/_oblique_splitter.pxd
@@ -96,8 +96,11 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
 
     # Oblique Splitting extra parameters
     cdef public float64_t feature_combinations          # Number of features to combine
-    cdef intp_t n_non_zeros                             # Number of non-zero features
-    cdef intp_t[::1] indices_to_sample                  # An array of indices to sample of size mtry X n_features
+    cdef intp_t n_non_zeros                             # Number of non-zero features to sample per projection matrix
+
+    # Oblique Splitting extra parameters (mtry, n_dims) matrix
+    # This will contain indices 0 to mtry*n_features to allow efficient shuffling.
+    cdef intp_t[::1] indices_to_sample                  # A 2D array of indices to sample of size mtry X n_features
     #                                                   # to sample from that produces a non-zero feature combination.
     #                                                   # This array is multiplied by the data matrix n_samples X n_features
     #                                                   # to produce a non-zero feature combination of size
@@ -148,6 +151,9 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
 
     cdef const intp_t[:] max_features_per_set   # the maximum number of features to sample from each feature set
 
+    # Each feature set has a different set of indices to sample from with a potentially different
+    # max_features argument. This is a 2D array of indices to sample of size mtry_in_set X features_in_set
+    # to sample from that produces a non-zero feature combination for each feature set.
     cdef vector[vector[intp_t]] multi_indices_to_sample
 
     cdef void sample_proj_mat(
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index 4e4f0860c..aee8e2881 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -8,6 +8,7 @@ import numpy as np
 
 from cython.operator cimport dereference as deref
 from libcpp.vector cimport vector
+from libcpp.algorithm cimport swap
 
 from .._lib.sklearn.tree._criterion cimport Criterion
 from .._lib.sklearn.tree._utils cimport rand_int, rand_uniform
@@ -132,6 +133,17 @@ cdef class BaseObliqueSplitter(Splitter):
         intp_t grid_size,
         uint32_t* random_state,
     ) noexcept nogil:
+        """Fisher-Yates shuffle for a 1D memoryview of indices.
+        
+        Parameters
+        ----------
+        indices_to_sample : memoryview of intp_t
+            The memoryview of indices to shuffle.
+        grid_size : intp_t
+            The number of times to shuffle the array.
+        random_state : uint32_t*
+            The random state to use for pseudo-randomness.
+        """
         cdef intp_t i, j
 
         # XXX: should this be `i` or `i+1`? for valid Fisher-Yates?
@@ -254,7 +266,7 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
 
         # construct an array to sample from mTry x n_features set of indices
         cdef intp_t[::1] indices_to_sample = self.indices_to_sample
-        cdef intp_t grid_size = self.max_features * self.n_features
+        cdef intp_t grid_size = len(indices_to_sample)
 
         # shuffle indices over the 2D grid to sample using Fisher-Yates
         self.fisher_yates_shuffle_memview(indices_to_sample, grid_size, random_state)
@@ -331,8 +343,8 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
         # Sample the projection matrix
         self.sample_proj_mat(self.proj_mat_weights, self.proj_mat_indices)
 
-        # with gil:
-        #     print("Finished sampling projection matrix")
+        with gil:
+            print("Finished sampling projection matrix")
 
         # For every vector in the projection matrix
         for feat_i in range(max_features):
@@ -408,6 +420,8 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
                 # Account for projection vector
                 temp_d = 0.0
                 for j in range(best_split.proj_vec_indices.size()):
+                    with gil:
+                        print(self.X.shape, samples[p], j, deref(best_split.proj_vec_indices)[j])
                     temp_d += self.X[samples[p], deref(best_split.proj_vec_indices)[j]] *\
                                 deref(best_split.proj_vec_weights)[j]
 
@@ -691,12 +705,6 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         # replaces usage of max_features
         self.max_features_per_set = max_features_per_set
 
-    def __getstate__(self):
-        return {}
-
-    def __setstate__(self, d):
-        pass
-
     def __reduce__(self):
         """Enable pickling the splitter."""
         return (type(self),
@@ -732,16 +740,24 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         cdef intp_t feature_set_begin = 0
         cdef intp_t size_of_feature_set, size_of_sampling
         cdef intp_t ifeat = 0
+        cdef intp_t iproj = 0
+        
+        # the index to sample in the vectorized mtry x n_features grid
+        cdef intp_t index
+        
         for i_feature in range(self.n_feature_sets):
             # n_features * max_features_per_set
             size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
             size_of_sampling = self.max_features_per_set[i_feature] * size_of_feature_set
 
             # push an index corresponding to each element we want to sample
+            # this pushes indices mtry_in_set * n_features_in_set
             for ifeat in range(size_of_sampling):
-                self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
-
-                print(i_feature, ifeat + feature_set_begin)
+                # index of the sampled feature in this feature set + feature set offset + projection offset
+                index = ifeat + feature_set_begin + (iproj * self.n_features)
+                self.multi_indices_to_sample[i_feature].push_back(index)
+                print('Inside init: ', i_feature, index, size_of_sampling, size_of_feature_set)
+            iproj += 1
             feature_set_begin = self.feature_set_ends[i_feature]
         return 0
 
@@ -756,7 +772,7 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         but now also uniformly samples features from each feature set.
         """
         cdef uint32_t* random_state = &self.rand_r_state
-        cdef intp_t feat_i, proj_i
+        cdef intp_t feat_i, proj_i, rand_vec_index
         cdef float32_t weight
 
         # keep track of the beginning and ending indices of each feature set
@@ -776,23 +792,41 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         # each feature-set equally
         while proj_i < self.max_features:
             # sample from a feature set
+            with gil:
+                print('Sampling projection: ', proj_i, self.n_samples, self.n_features, 
+                self.max_features, self.n_feature_sets, 
+                list(self.feature_set_ends[:]),
+                list(self.max_features_per_set[:]))
             for idx in range(self.n_feature_sets):
                 # get the max-features for this feature-set
                 max_features = self.max_features_per_set[idx]
 
                 grid_size = self.multi_indices_to_sample[idx].size()
+                with gil:
+                    print(self.multi_indices_to_sample[0].size())
+                    print(self.multi_indices_to_sample[1].size())
+
+                # for i in range(0, grid_size - 1):
+                #     j = rand_int(i + 1, grid_size, random_state)
+                #     swap[intp_t](self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j])
+
                 # Note: a temporary variable must not be used, else a copy will be made
-                for i in range(0, self.multi_indices_to_sample[idx].size() - 1):
+                for i in range(0, grid_size - 1):
                     j = rand_int(i + 1, grid_size, random_state)
                     self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j] = \
                         self.multi_indices_to_sample[idx][j], self.multi_indices_to_sample[idx][i]
 
                 for ifeature in range(max_features):
                     # sample random feature in this set
-                    feat_i = self.multi_indices_to_sample[idx][ifeature]
+                    rand_vec_index = self.multi_indices_to_sample[idx][ifeature]
 
                     # here, axis-aligned splits are entirely weights of 1
-                    weight = 1  # if (rand_int(0, 2, random_state) == 1) else -1
+                    weight = 1
+
+                    # get the projection index (i.e. row of the projection matrix) and
+                    # feature index (i.e. column of the projection matrix)
+                    proj_i = rand_vec_index // self.n_features
+                    feat_i = rand_vec_index % self.n_features
 
                     proj_mat_indices[proj_i].push_back(feat_i)  # Store vectorized index of nonzero
                     proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
@@ -800,7 +834,7 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                     # XXX: debug only
                     if feat_i > self.n_features:
                         with gil:
-                            print(idx, ifeature, proj_i, self.n_samples, self.n_features, feat_i)
+                            print('Sampling projection: ', idx, ifeature, proj_i, self.n_samples, self.n_features, feat_i)
 
                     # break early if we've sampled enough features
                     proj_i += 1
@@ -809,7 +843,6 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                 if proj_i >= self.max_features:
                     break
 
-
 # TODO: need to check segfault for multiview oblique splitter
 # REBUILD WITH BOUNDS CHECK
 cdef class MultiViewObliqueSplitter(MultiViewSplitter):
diff --git a/sktree/tree/tests/test_multiview.py b/sktree/tree/tests/test_multiview.py
index ad5bf143a..0d6bc4be3 100644
--- a/sktree/tree/tests/test_multiview.py
+++ b/sktree/tree/tests/test_multiview.py
@@ -27,6 +27,8 @@ def test_sklearn_compatible_estimator(estimator, check):
     check(estimator)
 
 
+
+@pytest.mark.skip()
 @pytest.mark.parametrize(
     "est", [MultiViewDecisionTreeClassifier, MultiViewObliqueDecisionTreeClassifier]
 )
diff --git a/test_mvoblique_tree.py b/test_mvoblique_tree.py
index c72aaba1b..347bb8d0f 100644
--- a/test_mvoblique_tree.py
+++ b/test_mvoblique_tree.py
@@ -18,63 +18,38 @@
 
 rng = np.random.default_rng(seed=seed)
 
-
-X_small = np.array(
-    [
-        [0, 0, 4, 0, 0, 0, 1, -14, 0, -4, 0, 0, 0, 0],
-        [0, 0, 5, 3, 0, -4, 0, 0, 1, -5, 0.2, 0, 4, 1],
-        [-1, -1, 0, 0, -4.5, 0, 0, 2.1, 1, 0, 0, -4.5, 0, 1],
-        [-1, -1, 0, -1.2, 0, 0, 0, 0, 0, 0, 0.2, 0, 0, 1],
-        [-1, -1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1],
-        [-1, -2, 0, 4, -3, 10, 4, 0, -3.2, 0, 4, 3, -4, 1],
-        [2.11, 0, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0.5, 0, -3, 1],
-        [2.11, 0, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0, 0, -2, 1],
-        [2.11, 8, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0, 0, -2, 1],
-        [2.11, 8, -6, -0.5, 0, 11, 0, 0, -3.2, 6, 0.5, 0, -1, 0],
-        [2, 8, 5, 1, 0.5, -4, 10, 0, 1, -5, 3, 0, 2, 0],
-        [2, 0, 1, 1, 1, -1, 1, 0, 0, -2, 3, 0, 1, 0],
-        [2, 0, 1, 2, 3, -1, 10, 2, 0, -1, 1, 2, 2, 0],
-        [1, 1, 0, 2, 2, -1, 1, 2, 0, -5, 1, 2, 3, 0],
-        [3, 1, 0, 3, 0, -4, 10, 0, 1, -5, 3, 0, 3, 1],
-        [2.11, 8, -6, -0.5, 0, 1, 0, 0, -3.2, 6, 0.5, 0, -3, 1],
-        [2.11, 8, -6, -0.5, 0, 1, 0, 0, -3.2, 6, 1.5, 1, -1, -1],
-        [2.11, 8, -6, -0.5, 0, 10, 0, 0, -3.2, 6, 0.5, 0, -1, -1],
-        [2, 0, 5, 1, 0.5, -2, 10, 0, 1, -5, 3, 1, 0, -1],
-        [2, 0, 1, 1, 1, -2, 1, 0, 0, -2, 0, 0, 0, 1],
-        [2, 1, 1, 1, 2, -1, 10, 2, 0, -1, 0, 2, 1, 1],
-        [1, 1, 0, 0, 1, -3, 1, 2, 0, -5, 1, 2, 1, 1],
-        [3, 1, 0, 1, 0, -4, 1, 0, 1, -2, 0, 0, 1, 0],
-    ]
+X = np.random.random((20, 10))
+y = np.random.randint(0, 2, size=20)
+
+# test with max_features as a float
+clf = MultiViewDecisionTreeClassifier(
+    random_state=seed,
+    feature_set_ends=[6, 10],
+    max_features=0.5,
 )
-
-y_small = [1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
-y_small_reg = [
-    1.0,
-    2.1,
-    1.2,
-    0.05,
-    10,
-    2.4,
-    3.1,
-    1.01,
-    0.01,
-    2.98,
-    3.1,
-    1.1,
-    0.0,
-    1.2,
-    2,
-    11,
-    0,
-    0,
-    4.5,
-    0.201,
-    1.06,
-    0.9,
-    0,
-]
-
-clf = MultiViewDecisionTreeClassifier(random_state=0)
-
-print(X_small.shape)
-clf.fit(X_small, y_small)
+clf.fit(X, y)
+
+assert_array_equal(clf.max_features_per_set_, [3, 2])
+assert clf.max_features_ == 5
+
+# test with max_features as sqrt
+# X = np.random.random((20, 13))
+# clf = MultiViewDecisionTreeClassifier(
+#     random_state=seed,
+#     feature_set_ends=[9, 13],
+#     max_features="sqrt",
+# )
+# clf.fit(X, y)
+# assert_array_equal(clf.max_features_per_set_, [3, 2])
+# assert clf.max_features_ == 5
+
+# # test with max_features as 'sqrt' but not a perfect square
+# X = np.random.random((20, 9))
+# clf = MultiViewDecisionTreeClassifier(
+#     random_state=seed,
+#     feature_set_ends=[5, 9],
+#     max_features="sqrt",
+# )
+# clf.fit(X, y)
+# assert_array_equal(clf.max_features_per_set_, [3, 2])
+# assert clf.max_features_ == 5
\ No newline at end of file

From 04daabede43b2a9ed179fd609deb7ffef4d538a3 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Wed, 3 Jul 2024 17:15:29 -0400
Subject: [PATCH 09/26] WIP

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/tree/_multiview.py           | 7 ++++++-
 sktree/tree/_oblique_splitter.pyx   | 8 ++++----
 sktree/tree/tests/test_multiview.py | 1 -
 test_mvoblique_tree.py              | 2 +-
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 3934d3e31..80b570014 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -441,7 +441,12 @@ def _build_tree(
         # the total number of features to sample per split
         self.max_features_ = np.sum(self.max_features_per_set_)
 
-        print(self.max_features_, self.max_features_per_set_, self.feature_set_ends_, self.n_features_in_set_)
+        print(
+            self.max_features_,
+            self.max_features_per_set_,
+            self.feature_set_ends_,
+            self.n_features_in_set_,
+        )
         if not isinstance(self.splitter, ObliqueSplitter):
             splitter = SPLITTERS[self.splitter](
                 criterion,
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index aee8e2881..10c03e972 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -7,8 +7,8 @@
 import numpy as np
 
 from cython.operator cimport dereference as deref
-from libcpp.vector cimport vector
 from libcpp.algorithm cimport swap
+from libcpp.vector cimport vector
 
 from .._lib.sklearn.tree._criterion cimport Criterion
 from .._lib.sklearn.tree._utils cimport rand_int, rand_uniform
@@ -756,7 +756,7 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                 # index of the sampled feature in this feature set + feature set offset + projection offset
                 index = ifeat + feature_set_begin + (iproj * self.n_features)
                 self.multi_indices_to_sample[i_feature].push_back(index)
-                print('Inside init: ', i_feature, index, size_of_sampling, size_of_feature_set)
+                print("Inside init: ", i_feature, index, size_of_sampling, size_of_feature_set)
             iproj += 1
             feature_set_begin = self.feature_set_ends[i_feature]
         return 0
@@ -793,7 +793,7 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         while proj_i < self.max_features:
             # sample from a feature set
             with gil:
-                print('Sampling projection: ', proj_i, self.n_samples, self.n_features, 
+                print("Sampling projection: ", proj_i, self.n_samples, self.n_features, 
                 self.max_features, self.n_feature_sets, 
                 list(self.feature_set_ends[:]),
                 list(self.max_features_per_set[:]))
@@ -834,7 +834,7 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                     # XXX: debug only
                     if feat_i > self.n_features:
                         with gil:
-                            print('Sampling projection: ', idx, ifeature, proj_i, self.n_samples, self.n_features, feat_i)
+                            print("Sampling projection: ", idx, ifeature, proj_i, self.n_samples, self.n_features, feat_i)
 
                     # break early if we've sampled enough features
                     proj_i += 1
diff --git a/sktree/tree/tests/test_multiview.py b/sktree/tree/tests/test_multiview.py
index 0d6bc4be3..508c02d14 100644
--- a/sktree/tree/tests/test_multiview.py
+++ b/sktree/tree/tests/test_multiview.py
@@ -27,7 +27,6 @@ def test_sklearn_compatible_estimator(estimator, check):
     check(estimator)
 
 
-
 @pytest.mark.skip()
 @pytest.mark.parametrize(
     "est", [MultiViewDecisionTreeClassifier, MultiViewObliqueDecisionTreeClassifier]
diff --git a/test_mvoblique_tree.py b/test_mvoblique_tree.py
index 347bb8d0f..1fd1124ee 100644
--- a/test_mvoblique_tree.py
+++ b/test_mvoblique_tree.py
@@ -52,4 +52,4 @@
 # )
 # clf.fit(X, y)
 # assert_array_equal(clf.max_features_per_set_, [3, 2])
-# assert clf.max_features_ == 5
\ No newline at end of file
+# assert clf.max_features_ == 5

From ac87b0725f40fe1f70b9b41c78591f5f06dff152 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 11:44:05 -0400
Subject: [PATCH 10/26] Working prototype for multiview oblique

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .../plot_multiview_axis_aligned_splitter.py   | 142 ++++++++-
 sktree/tree/_multiview.py                     |  66 ++++-
 sktree/tree/_oblique_splitter.pxd             |   4 +-
 sktree/tree/_oblique_splitter.pyx             | 276 ++++++++++++------
 sktree/tree/tests/test_multiview.py           |   8 +-
 test_mvoblique_tree.py                        |  55 ----
 6 files changed, 391 insertions(+), 160 deletions(-)
 delete mode 100644 test_mvoblique_tree.py

diff --git a/examples/splitters/plot_multiview_axis_aligned_splitter.py b/examples/splitters/plot_multiview_axis_aligned_splitter.py
index 00b8c0280..81ae5fae3 100644
--- a/examples/splitters/plot_multiview_axis_aligned_splitter.py
+++ b/examples/splitters/plot_multiview_axis_aligned_splitter.py
@@ -28,10 +28,10 @@
 from matplotlib.colors import ListedColormap
 
 from sktree._lib.sklearn.tree._criterion import Gini
-from sktree.tree._oblique_splitter import MultiViewSplitterTester
+from sktree.tree._oblique_splitter import MultiViewObliqueSplitterTester, MultiViewSplitterTester
 
 criterion = Gini(1, np.array((0, 1)))
-max_features = 5
+max_features = 6
 min_samples_leaf = 1
 min_weight_leaf = 0.0
 random_state = np.random.RandomState(10)
@@ -40,7 +40,7 @@
 feature_set_ends = np.array([3, 5, 9], dtype=np.intp)
 n_feature_sets = len(feature_set_ends)
 
-max_features_per_set_ = None
+max_features_per_set_ = np.array([2, 2, 2])
 feature_combinations = 1
 monotonic_cst = None
 missing_value_feature_mask = None
@@ -99,7 +99,11 @@
 for iend in feature_set_ends[1:]:
     ax.axvline(iend - 0.5, color="black", linewidth=1)
 
-ax.set(title="Sampled Projection Matrix", xlabel="Feature Index", ylabel="Projection Vector Index")
+ax.set(
+    title="Sampled Projection Matrix: \nMultiview Axis Aligned Split with Equal Max_Features",
+    xlabel="Feature Index",
+    ylabel="Projection Vector Index",
+)
 ax.set_xticks(np.arange(feature_set_ends[-1]))
 ax.set_yticks(np.arange(max_features))
 ax.set_yticklabels(np.arange(max_features, dtype=int) + 1)
@@ -115,6 +119,7 @@
 colorbar.set_label("Projection Weight (I.e. Sampled Feature From a Feature Set)")
 colorbar.ax.set_yticklabels(["0", "1"])
 
+fig.tight_layout()
 plt.show()
 
 # %%
@@ -160,7 +165,11 @@
 for iend in feature_set_ends[1:]:
     ax.axvline(iend - 0.5, color="black", linewidth=1)
 
-ax.set(title="Sampled Projection Matrix", xlabel="Feature Index", ylabel="Projection Vector Index")
+ax.set(
+    title="Sampled Projection Matrix:\n Multiview Axis-aligned Splitter",
+    xlabel="Feature Index",
+    ylabel="Projection Vector Index",
+)
 ax.set_xticks(np.arange(feature_set_ends[-1]))
 ax.set_yticks(np.arange(max_features))
 ax.set_yticklabels(np.arange(max_features, dtype=int) + 1)
@@ -176,6 +185,129 @@
 colorbar.set_label("Projection Weight (I.e. Sampled Feature From a Feature Set)")
 colorbar.ax.set_yticklabels(["0", "1"])
 
+fig.tight_layout()
+plt.show()
+
+# %%
+# Sampling multiview oblique splits
+# ---------------------------------
+# The multi-view splitter can also sample oblique splits. The oblique splits are
+# generated by sampling a projection matrix and then transforming the data into the
+# projected space.
+
+feature_combinations = 1.5
+cross_feature_set_sampling = False
+splitter = MultiViewObliqueSplitterTester(
+    criterion,
+    max_features,
+    min_samples_leaf,
+    min_weight_leaf,
+    random_state,
+    monotonic_cst,
+    feature_combinations,
+    feature_set_ends,
+    n_feature_sets,
+    max_features_per_set_,
+    cross_feature_set_sampling,
+)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
+
+# sample the projection matrix
+projection_matrix = splitter.sample_projection_matrix_py()
+print(projection_matrix)
+
+cmap = ListedColormap(["orange", "white", "green"])
+
+# Create a heatmap to visualize the indices
+fig, ax = plt.subplots(figsize=(6, 6))
+
+ax.imshow(
+    projection_matrix, cmap=cmap, aspect=feature_set_ends[-1] / max_features, interpolation="none"
+)
+ax.axvline(feature_set_ends[0] - 0.5, color="black", linewidth=1, label="Feature Sets")
+for iend in feature_set_ends[1:]:
+    ax.axvline(iend - 0.5, color="black", linewidth=1)
+
+ax.set(
+    title="Sampled Projection Matrix:\n Multiview Oblique Splits W/O Cross-Feature Sampling",
+    xlabel="Feature Index",
+    ylabel="Projection Vector Index",
+)
+ax.set_xticks(np.arange(feature_set_ends[-1]))
+ax.set_yticks(np.arange(max_features))
+ax.set_yticklabels(np.arange(max_features, dtype=int) + 1)
+ax.set_xticklabels(np.arange(feature_set_ends[-1], dtype=int) + 1)
+ax.legend()
+
+# Create a mappable object
+sm = ScalarMappable(cmap=cmap)
+sm.set_array([])  # You can set an empty array or values here
+
+# Create a color bar with labels for each feature set
+colorbar = fig.colorbar(sm, ax=ax, ticks=[0, 0.5, 1], format="%d")
+colorbar.set_label("Projection Weight")
+colorbar.ax.set_yticklabels(["-1", "0", "1"])
+
+fig.tight_layout()
+plt.show()
+
+# %%
+# Sampling multiview oblique splits with cross-feature-set sampling.
+# Now, we can also sample across feature sets within each projection vector.
+
+cross_feature_set_sampling = True
+splitter = MultiViewObliqueSplitterTester(
+    criterion,
+    max_features,
+    min_samples_leaf,
+    min_weight_leaf,
+    random_state,
+    monotonic_cst,
+    feature_combinations,
+    feature_set_ends,
+    n_feature_sets,
+    max_features_per_set_,
+    cross_feature_set_sampling,
+)
+splitter.init_test(X, y, sample_weight, missing_value_feature_mask)
+
+# sample the projection matrix
+projection_matrix = splitter.sample_projection_matrix_py()
+print(projection_matrix)
+
+cmap = ListedColormap(["orange", "white", "green"])
+
+# Create a heatmap to visualize the indices
+fig, ax = plt.subplots(figsize=(6, 6))
+
+ax.imshow(
+    projection_matrix, cmap=cmap, aspect=feature_set_ends[-1] / max_features, interpolation="none"
+)
+ax.axvline(feature_set_ends[0] - 0.5, color="black", linewidth=1, label="Feature Sets")
+for iend in feature_set_ends[1:]:
+    ax.axvline(iend - 0.5, color="black", linewidth=1)
+
+ax.set(
+    title="Sampled Projection Matrix:\n Multiview Oblique Splits W/ Cross-Feature Sampling",
+    xlabel="Feature Index",
+    ylabel="Projection Vector Index",
+)
+ax.set_xticks(np.arange(feature_set_ends[-1]))
+ax.set_yticks(np.arange(max_features))
+ax.set_yticklabels(np.arange(max_features, dtype=int) + 1)
+ax.set_xticklabels(np.arange(feature_set_ends[-1], dtype=int) + 1)
+ax.legend()
+
+# Create a mappable object
+sm = ScalarMappable(cmap=cmap)
+sm.set_array([])  # You can set an empty array or values here
+
+# Create a color bar with labels for each feature set
+colorbar = fig.colorbar(sm, ax=ax, ticks=[0, 0.5, 1], format="%d")
+colorbar.set_label("Projection Weight")
+colorbar.ax.set_yticklabels(["-1", "0", "1"])
+
+fig.tight_layout()
 plt.show()
 
 # %%
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 80b570014..e159bb621 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -441,12 +441,6 @@ def _build_tree(
         # the total number of features to sample per split
         self.max_features_ = np.sum(self.max_features_per_set_)
 
-        print(
-            self.max_features_,
-            self.max_features_per_set_,
-            self.feature_set_ends_,
-            self.n_features_in_set_,
-        )
         if not isinstance(self.splitter, ObliqueSplitter):
             splitter = SPLITTERS[self.splitter](
                 criterion,
@@ -851,6 +845,7 @@ class MultiViewObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassif
         "array-like",
         None,
     ]
+    _parameter_constraints["cross_feature_set_sampling"] = ["boolean"]
 
     def __init__(
         self,
@@ -871,6 +866,7 @@ def __init__(
         monotonic_cst=None,
         feature_set_ends=None,
         feature_combinations=None,
+        cross_feature_set_sampling=False,
     ):
         super().__init__(
             criterion=criterion,
@@ -891,6 +887,7 @@ def __init__(
 
         self.feature_set_ends = feature_set_ends
         self.feature_combinations = feature_combinations
+        self.cross_feature_set_sampling = cross_feature_set_sampling
         self._max_features_arr = None
 
     def _build_tree(
@@ -1053,6 +1050,7 @@ def _build_tree(
                 self.feature_set_ends_,
                 self.n_feature_sets_,
                 self.max_features_per_set_,
+                self.cross_feature_set_sampling,
             )
 
         self.tree_ = ObliqueTree(self.n_features_in_, self.n_classes_, self.n_outputs_)
@@ -1084,6 +1082,62 @@ def _build_tree(
             self.n_classes_ = self.n_classes_[0]
             self.classes_ = self.classes_[0]
 
+    def _fit(
+        self,
+        X,
+        y,
+        sample_weight=None,
+        check_input=True,
+        missing_values_in_feature_mask=None,
+        classes=None,
+    ):
+        # XXX: BaseDecisionTree does a check that requires max_features to not be a list/array-like
+        # so we need to temporarily set it to an acceptable value
+        # in the meantime, we will reset:
+        #  - self.max_features_ to the original value
+        #  - self.max_features_arr contains a possible array-like setting of max_features
+        self._max_features_arr = self.max_features
+        self.max_features = None
+        super()._fit(X, y, sample_weight, check_input, missing_values_in_feature_mask, classes)
+        self.max_features = self._max_features_arr
+        return self
+
+    def fit(self, X, y, sample_weight=None, check_input=True, classes=None):
+        """Build a decision tree classifier from the training set (X, y).
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csc_matrix``.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            The target values (class labels) as integers or strings.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. Splits are also
+            ignored if they would result in any single class carrying a
+            negative weight in either child node.
+
+        check_input : bool, default=True
+            Allow to bypass several input checking.
+            Don't use this parameter unless you know what you're doing.
+
+        classes : array-like of shape (n_classes,), default=None
+            List of all the classes that can possibly appear in the y vector.
+
+        Returns
+        -------
+        self : MultiViewDecisionTreeClassifier
+            Fitted estimator.
+        """
+        return self._fit(
+            X, y, sample_weight=sample_weight, check_input=check_input, classes=classes
+        )
+
     @property
     def _inheritable_fitted_attribute(self):
         """Define additional attributes to pass onto a parent meta tree-estimator.
diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
index 9f6df2d3d..aea477043 100644
--- a/sktree/tree/_oblique_splitter.pxd
+++ b/sktree/tree/_oblique_splitter.pxd
@@ -165,7 +165,9 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
 
 # XXX: This splitter is experimental. Expect changes frequently.
 cdef class MultiViewObliqueSplitter(MultiViewSplitter):
-    cdef const intp_t[:] n_non_zeros_per_set  # the number of non-zero features in each feature set
+    # cdef const intp_t[:] n_non_zeros_per_set  # the number of non-zero features in each feature set
+    cdef intp_t _max_feature_combinations       # Number of non-zero features to sample per projection matrix
+    cdef bint cross_feature_set_sampling        # Whether we sample across feature set when creating a projection vector
 
     cdef void sample_proj_mat(
         self,
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index 10c03e972..d52d760c1 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -7,6 +7,7 @@
 import numpy as np
 
 from cython.operator cimport dereference as deref
+from libc.math cimport ceil
 from libcpp.algorithm cimport swap
 from libcpp.vector cimport vector
 
@@ -134,7 +135,7 @@ cdef class BaseObliqueSplitter(Splitter):
         uint32_t* random_state,
     ) noexcept nogil:
         """Fisher-Yates shuffle for a 1D memoryview of indices.
-        
+
         Parameters
         ----------
         indices_to_sample : memoryview of intp_t
@@ -261,8 +262,7 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         cdef intp_t n_non_zeros = self.n_non_zeros
         cdef uint32_t* random_state = &self.rand_r_state
 
-        cdef intp_t i, feat_i, proj_i, rand_vec_index
-        cdef float32_t weight
+        cdef intp_t i, rand_vec_index
 
         # construct an array to sample from mTry x n_features set of indices
         cdef intp_t[::1] indices_to_sample = self.indices_to_sample
@@ -288,6 +288,7 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
             proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
             proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
+
 cdef class BestObliqueSplitter(ObliqueSplitter):
     def __reduce__(self):
         """Enable pickling the splitter."""
@@ -343,9 +344,6 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
         # Sample the projection matrix
         self.sample_proj_mat(self.proj_mat_weights, self.proj_mat_indices)
 
-        with gil:
-            print("Finished sampling projection matrix")
-
         # For every vector in the projection matrix
         for feat_i in range(max_features):
             # Projection vector has no nonzeros
@@ -420,8 +418,6 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
                 # Account for projection vector
                 temp_d = 0.0
                 for j in range(best_split.proj_vec_indices.size()):
-                    with gil:
-                        print(self.X.shape, samples[p], j, deref(best_split.proj_vec_indices)[j])
                     temp_d += self.X[samples[p], deref(best_split.proj_vec_indices)[j]] *\
                                 deref(best_split.proj_vec_weights)[j]
 
@@ -738,27 +734,25 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         # create a helper array for allowing efficient Fisher-Yates
         cdef intp_t i_feature = 0
         cdef intp_t feature_set_begin = 0
-        cdef intp_t size_of_feature_set, size_of_sampling
+        cdef intp_t size_of_feature_set
         cdef intp_t ifeat = 0
-        cdef intp_t iproj = 0
-        
-        # the index to sample in the vectorized mtry x n_features grid
-        cdef intp_t index
-        
+
+        # Here, we sample the indices of the features to sample in each feature set
+        # as a separate vector. This is done to allow for efficient Fisher-Yates
+        # shuffling of the indices, such that we randomly sample features to consider, but within
+        # each feature set separately. This ensures that the sampled projection matrix consists of
+        # a balanced number of features from each feature set.
+        #
+        # Example:
+        # multi_indices_to_sample[0] = [0, 1, 2, 3]
+        # multi_indices_to_sample[1] = [4, 5]
+        # which corresponds to a feature set with 4 features and another with 2 features.
         for i_feature in range(self.n_feature_sets):
-            # n_features * max_features_per_set
             size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
-            size_of_sampling = self.max_features_per_set[i_feature] * size_of_feature_set
-
-            # push an index corresponding to each element we want to sample
-            # this pushes indices mtry_in_set * n_features_in_set
-            for ifeat in range(size_of_sampling):
-                # index of the sampled feature in this feature set + feature set offset + projection offset
-                index = ifeat + feature_set_begin + (iproj * self.n_features)
-                self.multi_indices_to_sample[i_feature].push_back(index)
-                print("Inside init: ", i_feature, index, size_of_sampling, size_of_feature_set)
-            iproj += 1
+            for ifeat in range(size_of_feature_set):
+                self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
             feature_set_begin = self.feature_set_ends[i_feature]
+
         return 0
 
     cdef void sample_proj_mat(
@@ -772,7 +766,7 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         but now also uniformly samples features from each feature set.
         """
         cdef uint32_t* random_state = &self.rand_r_state
-        cdef intp_t feat_i, proj_i, rand_vec_index
+        cdef intp_t feat_i, proj_i
         cdef float32_t weight
 
         # keep track of the beginning and ending indices of each feature set
@@ -786,57 +780,33 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         # of candidates, but if one feature set is exhausted, then that one is no longer sampled
         cdef intp_t i, j
 
+        # keep track of which mtry we are on
         proj_i = 0
 
         # 02: Algorithm samples a different number features from each set, but considers
         # each feature-set equally
         while proj_i < self.max_features:
             # sample from a feature set
-            with gil:
-                print("Sampling projection: ", proj_i, self.n_samples, self.n_features, 
-                self.max_features, self.n_feature_sets, 
-                list(self.feature_set_ends[:]),
-                list(self.max_features_per_set[:]))
             for idx in range(self.n_feature_sets):
                 # get the max-features for this feature-set
                 max_features = self.max_features_per_set[idx]
 
                 grid_size = self.multi_indices_to_sample[idx].size()
-                with gil:
-                    print(self.multi_indices_to_sample[0].size())
-                    print(self.multi_indices_to_sample[1].size())
-
-                # for i in range(0, grid_size - 1):
-                #     j = rand_int(i + 1, grid_size, random_state)
-                #     swap[intp_t](self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j])
-
                 # Note: a temporary variable must not be used, else a copy will be made
                 for i in range(0, grid_size - 1):
                     j = rand_int(i + 1, grid_size, random_state)
-                    self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j] = \
-                        self.multi_indices_to_sample[idx][j], self.multi_indices_to_sample[idx][i]
+                    swap[intp_t](self.multi_indices_to_sample[idx][i], self.multi_indices_to_sample[idx][j])
 
                 for ifeature in range(max_features):
                     # sample random feature in this set
-                    rand_vec_index = self.multi_indices_to_sample[idx][ifeature]
+                    feat_i = self.multi_indices_to_sample[idx][ifeature]
 
                     # here, axis-aligned splits are entirely weights of 1
-                    weight = 1
-
-                    # get the projection index (i.e. row of the projection matrix) and
-                    # feature index (i.e. column of the projection matrix)
-                    proj_i = rand_vec_index // self.n_features
-                    feat_i = rand_vec_index % self.n_features
+                    weight = 1  # if (rand_int(0, 2, random_state) == 1) else -1
 
-                    proj_mat_indices[proj_i].push_back(feat_i)  # Store vectorized index of nonzero
+                    proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
                     proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
-                    # XXX: debug only
-                    if feat_i > self.n_features:
-                        with gil:
-                            print("Sampling projection: ", idx, ifeature, proj_i, self.n_samples, self.n_features, feat_i)
-
-                    # break early if we've sampled enough features
                     proj_i += 1
                     if proj_i >= self.max_features:
                         break
@@ -858,6 +828,7 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         const intp_t[:] feature_set_ends,
         intp_t n_feature_sets,
         const intp_t[:] max_features_per_set,
+        bint cross_feature_set_sampling,
         *argv
     ):
         self.feature_set_ends = feature_set_ends
@@ -869,11 +840,46 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         self.max_features_per_set = max_features_per_set
 
         # compute # of non-zeros expected on average per feature set
-        cdef intp_t[:] n_non_zeros_per_set = np.zeros(self.n_feature_sets, dtype=np.intp)
-        cdef intp_t i
-        for i in range(self.n_feature_sets):
-            n_non_zeros_per_set[i] = <intp_t> (self.max_features_per_set[i] * self.feature_combinations)
-        self.n_non_zeros_per_set = n_non_zeros_per_set
+        # cdef intp_t[:] n_non_zeros_per_set = np.zeros(self.n_feature_sets, dtype=np.intp)
+        # cdef intp_t i
+        # for i in range(self.n_feature_sets):
+        #     n_non_zeros_per_set[i] = <intp_t> (self.max_features_per_set[i] * self.feature_combinations)
+        # self.n_non_zeros_per_set = n_non_zeros_per_set
+
+        self._max_feature_combinations = <intp_t> ceil(self.feature_combinations)
+        self.cross_feature_set_sampling = cross_feature_set_sampling
+
+    cdef int init(
+        self,
+        object X,
+        const float64_t[:, ::1] y,
+        const float64_t[:] sample_weight,
+        const unsigned char[::1] missing_values_in_feature_mask,
+    ) except -1:
+        Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
+
+        self.X = X
+
+        # create a helper array for allowing efficient Fisher-Yates
+        self.multi_indices_to_sample = vector[vector[intp_t]](self.n_feature_sets)
+
+        # Here, we sample the indices of the features to sample in each feature set
+        # as a separate vector. This is done to allow for efficient Fisher-Yates
+        # shuffling of the indices, such that we randomly sample features to consider, but within
+        # each feature set separately. This ensures that the sampled projection matrix consists of
+        # a balanced number of features from each feature set.
+        #
+        # Example:
+        # multi_indices_to_sample[0] = [0, 1, 2, 3]
+        # multi_indices_to_sample[1] = [4, 5]
+        # which corresponds to a feature set with 4 features and another with 2 features.
+        # for i_feature in range(self.n_feature_sets):
+        #     size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
+        #     for ifeat in range(size_of_feature_set):
+        #         self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
+        #     feature_set_begin = self.feature_set_ends[i_feature]
+
+        return 0
 
     cdef void sample_proj_mat(
         self,
@@ -888,44 +894,52 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         cdef intp_t n_features = self.n_features
         cdef uint32_t* random_state = &self.rand_r_state
 
-        cdef intp_t i, j, feat_i, proj_i, rand_vec_index
-        cdef float32_t weight
-
-        # construct an array to sample from mTry x n_features set of indices
-        cdef vector[intp_t] indices_to_sample
-        cdef intp_t grid_size
+        cdef intp_t i, rand_vec_index
 
         # keep track of the beginning and ending indices of each feature set
         cdef intp_t idx
 
-        # 02: Algorithm samples feature combinations from each feature set uniformly and evaluates
-        # them independently.
-        # sample from a feature set using linear combinations among the two sets
+        # random number of non-zeros to sample per projection vector
+        cdef intp_t n_non_zeros
+        cdef intp_t rand_feature_set
+        cdef intp_t current_feature_set_end = 0
+        cdef intp_t n_features_in_set, n_features_in_set_buff
+
+        # keep track of which projection vector we are analyzing
+        cdef intp_t proj_i = 0
+
+        # XXX: Compared to the oblique splitter, the multi-view oblique splitter differs in how
+        # it considers combinations of features. In the oblique splitter, we sample out of a mtry x n_features
+        # matrix, an expected number of non-zeros throughout the whole matrix. In the multi-view oblique splitter,
+        # we sample per mtry a non-zero projection vector. In the oblique splitter, this means that
+        # not every projection vector is actually non-zero, but in the multi-view oblique splitter, every
+        # projection vector is non-zero.
+        #
+        # As of 07/05/24, we could still change this in the oblique splitter, so we don't have trivial
+        # projection vectors.
+
+        # The algorithm for sampling a multi-view projection matrix proceeds as follows:
+        # 0. for each feature set, with a possibly different max_features:
+        # 1. Determine the number of non-zeros we want to sample `rand_uniform(0, math.ceil(self.feature_combinations))`.
+        # 2a. [Optiona] If self.cross_feature_set_sampling, then while idx < n_non_zeros, sample a feature-set randomly
+        # 2b. sample a feature within feature-set randomly
+        # 2c. sample a weight randomly
         for idx in range(self.n_feature_sets):
-            # indices to sample is a 1D-index array of size (max_features * n_features_in_set)
-            # which is Fisher-Yates shuffled to sample random features in each feature set
-            indices_to_sample = self.multi_indices_to_sample[idx]
-            grid_size = indices_to_sample.size()
-
-            # shuffle indices over the 2D grid for this feature set to sample using Fisher-Yates
-            for i in range(0, grid_size):
-                j = rand_int(0, grid_size, random_state)
-                indices_to_sample[j], indices_to_sample[i] = \
-                    indices_to_sample[i], indices_to_sample[j]
-
-            # with gil:
-            #     print(idx, "Finished fisher yates...")
-            #     print(len(self.n_non_zeros_per_set), len(self.max_features_per_set), len(self.multi_indices_to_sample))
-            #     print(len(indices_to_sample), grid_size, self.n_non_zeros_per_set[idx])
-
-            # we want "n_non_zeros / K" for this feature set over K feature sets
-            for i in range(0, self.n_non_zeros_per_set[idx]):
-                # get the next index from the shuffled index array
-                rand_vec_index = indices_to_sample[i]
+            n_features_in_set = self.feature_set_ends[idx] - current_feature_set_end
+
+            # 0. sample mtry projection vectors for this feature set
+            for jdx in range(self.max_features_per_set[idx]):
+                # 1. Determine the number of non-zeros we want to sample in this feature set's mtry
+                # We add 1 since the upper bound is exclusive
+                n_non_zeros = rand_int(0, self._max_feature_combinations + 1, random_state)
 
+                # sample a random feature in the current feature set
+                rand_vec_index = rand_int(0, n_features_in_set, random_state) + current_feature_set_end
+
+                # push projection vector index and weight
                 # get the projection index (i.e. row of the projection matrix) and
                 # feature index (i.e. column of the projection matrix)
-                proj_i = rand_vec_index // n_features
+                # proj_i = rand_vec_index // n_features
                 feat_i = rand_vec_index % n_features
 
                 # sample a random weight
@@ -934,6 +948,88 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
                 proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
                 proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
 
+                # sample 'n_non_zeros' in a mtry_per_feature_set X n_features projection matrix
+                for i in range(1, n_non_zeros):
+                    if self.cross_feature_set_sampling:
+                        # sample a feature set randomly if we allow cross-sampling
+                        rand_feature_set = rand_int(0, self.n_feature_sets, random_state)
+                        n_features_in_set_buff = self.feature_set_ends[rand_feature_set]
+                        if rand_feature_set > 0:
+                            n_features_in_set_buff -= self.feature_set_ends[rand_feature_set - 1]
+                    else:
+                        rand_feature_set = idx
+                        n_features_in_set_buff = n_features_in_set
+
+                    # get another random feature in a possibly different feature set
+                    rand_vec_index = rand_int(0, n_features_in_set_buff, random_state)
+                    if rand_feature_set > 0:
+                        rand_vec_index += self.feature_set_ends[rand_feature_set - 1]
+
+                    # get the projection index (i.e. row of the projection matrix) and
+                    # feature index (i.e. column of the projection matrix)
+                    # proj_i = rand_vec_index // n_features
+                    feat_i = rand_vec_index % n_features
+
+                    # sample a random weight
+                    weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
+
+                    proj_mat_indices[proj_i].push_back(feat_i)  # Store index of nonzero
+                    proj_mat_weights[proj_i].push_back(weight)  # Store weight of nonzero
+
+                # increment the projection vector we consider
+                proj_i += 1
+
+            # offset to sample features within the next feature set
+            current_feature_set_end = self.feature_set_ends[idx]
+
+
+cdef class MultiViewObliqueSplitterTester(MultiViewObliqueSplitter):
+    """A class to expose a Python interface for testing."""
+
+    cpdef sample_projection_matrix_py(self):
+        """Sample projection matrix using a patch.
+
+        Used for testing purposes.
+
+        Returns projection matrix of shape (max_features, n_features).
+        """
+        cdef vector[vector[float32_t]] proj_mat_weights = vector[vector[float32_t]](self.max_features)
+        cdef vector[vector[intp_t]] proj_mat_indices = vector[vector[intp_t]](self.max_features)
+        cdef intp_t i, j
+
+        # sample projection matrix in C/C++
+        self.sample_proj_mat(proj_mat_weights, proj_mat_indices)
+
+        # convert the projection matrix to something that can be used in Python
+        proj_vecs = np.zeros((self.max_features, self.n_features), dtype=np.float32)
+        for i in range(0, self.max_features):
+            for j in range(0, proj_mat_weights[i].size()):
+                weight = proj_mat_weights[i][j]
+                feat = proj_mat_indices[i][j]
+
+                proj_vecs[i, feat] = weight
+
+        return proj_vecs
+
+    cpdef init_test(self, X, y, sample_weight, missing_values_in_feature_mask=None):
+        """Initializes the state of the splitter.
+
+        Used for testing purposes.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The input samples.
+        y : array-like, shape (n_samples,)
+            The target values (class labels in classification, real numbers in
+            regression).
+        sample_weight : array-like, shape (n_samples,)
+            Sample weights.
+        missing_values_in_feature_mask : array-like, shape (n_features,)
+            Whether or not a feature has missing values.
+        """
+        self.init(X, y, sample_weight, missing_values_in_feature_mask)
+
 
 cdef class MultiViewSplitterTester(MultiViewSplitter):
     """A class to expose a Python interface for testing."""
diff --git a/sktree/tree/tests/test_multiview.py b/sktree/tree/tests/test_multiview.py
index 508c02d14..409455caa 100644
--- a/sktree/tree/tests/test_multiview.py
+++ b/sktree/tree/tests/test_multiview.py
@@ -27,11 +27,13 @@ def test_sklearn_compatible_estimator(estimator, check):
     check(estimator)
 
 
-@pytest.mark.skip()
 @pytest.mark.parametrize(
-    "est", [MultiViewDecisionTreeClassifier, MultiViewObliqueDecisionTreeClassifier]
+    "est, baseline_est",
+    [
+        (MultiViewDecisionTreeClassifier, DecisionTreeClassifier),
+        (MultiViewDecisionTreeClassifier, MultiViewObliqueDecisionTreeClassifier),
+    ],
 )
-@pytest.mark.parametrize("baseline_est", [MultiViewDecisionTreeClassifier, DecisionTreeClassifier])
 def test_multiview_classification(baseline_est, est):
     """Test that explicit knowledge of multi-view structure improves classification accuracy.
 
diff --git a/test_mvoblique_tree.py b/test_mvoblique_tree.py
deleted file mode 100644
index 1fd1124ee..000000000
--- a/test_mvoblique_tree.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import math
-
-import numpy as np
-import pytest
-from numpy.testing import assert_array_equal
-from sklearn.datasets import make_blobs
-from sklearn.metrics import accuracy_score
-from sklearn.model_selection import cross_val_score
-from sklearn.utils.estimator_checks import parametrize_with_checks
-
-from sktree.tree import (
-    DecisionTreeClassifier,
-    MultiViewDecisionTreeClassifier,
-    MultiViewObliqueDecisionTreeClassifier,
-)
-
-seed = 12345
-
-rng = np.random.default_rng(seed=seed)
-
-X = np.random.random((20, 10))
-y = np.random.randint(0, 2, size=20)
-
-# test with max_features as a float
-clf = MultiViewDecisionTreeClassifier(
-    random_state=seed,
-    feature_set_ends=[6, 10],
-    max_features=0.5,
-)
-clf.fit(X, y)
-
-assert_array_equal(clf.max_features_per_set_, [3, 2])
-assert clf.max_features_ == 5
-
-# test with max_features as sqrt
-# X = np.random.random((20, 13))
-# clf = MultiViewDecisionTreeClassifier(
-#     random_state=seed,
-#     feature_set_ends=[9, 13],
-#     max_features="sqrt",
-# )
-# clf.fit(X, y)
-# assert_array_equal(clf.max_features_per_set_, [3, 2])
-# assert clf.max_features_ == 5
-
-# # test with max_features as 'sqrt' but not a perfect square
-# X = np.random.random((20, 9))
-# clf = MultiViewDecisionTreeClassifier(
-#     random_state=seed,
-#     feature_set_ends=[5, 9],
-#     max_features="sqrt",
-# )
-# clf.fit(X, y)
-# assert_array_equal(clf.max_features_per_set_, [3, 2])
-# assert clf.max_features_ == 5

From 81493fecea0edd171c1871f1564e05e9a0c9562f Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 11:45:28 -0400
Subject: [PATCH 11/26] Working prototype for multiview oblique

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/tree/_multiview.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index e159bb621..0faaf53d9 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -772,6 +772,14 @@ class MultiViewObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassif
         next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
         then this will assume that there is only one feature set.
 
+    feature_combinations : float, default=None
+        The number of feature combinations to consider at each split.
+        If None, then this will default to the number of features in the
+        respective feature set.
+
+    cross_feature_set_sampling : bool, default=False
+        Whether to sample features across feature sets during the oblique splits.
+
     Attributes
     ----------
     classes_ : ndarray of shape (n_classes,) or list of ndarray

From c6824b9d71728d26ed5a5dd91199e0ae5a4da4ab Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 12:24:29 -0400
Subject: [PATCH 12/26] Add mvrf

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v0.9.rst                |   6 +-
 sktree/__init__.py                    |   7 +-
 sktree/ensemble/__init__.py           |   2 +-
 sktree/ensemble/_multiview.py         | 293 +++++++++++++++++++++++++-
 sktree/tests/test_multiview_forest.py |   7 +-
 5 files changed, 310 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
index 696929b5e..6a7a4badf 100644
--- a/doc/whats_new/v0.9.rst
+++ b/doc/whats_new/v0.9.rst
@@ -17,7 +17,11 @@ Changelog
     ``apply_max_features_per_feature_set`` argument anymore. Instead, the
     ``max_features`` argument is used to control the number of features to
     consider when looking for the best split within each feature set explicitly.
-    By `Adam Li`_ :pr:`#247`.
+    By `Adam Li`_ :pr:`#265`.
+
+- |Feature| :class:`sktree.tree.MultiViewObliqueDecisionTreeClassifier` is implemented
+    along with its forest version :class:`sktree.ensemble.MultiViewObliqueRandomForestClassifier`.
+    By `Adam Li`_ :pr:`#265`.
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/sktree/__init__.py b/sktree/__init__.py
index 58636a24c..07dede4d5 100644
--- a/sktree/__init__.py
+++ b/sktree/__init__.py
@@ -45,7 +45,11 @@
             ExtraTreesRegressor,
         )
         from .neighbors import NearestNeighborsMetaEstimator
-        from .ensemble import ExtendedIsolationForest, MultiViewRandomForestClassifier
+        from .ensemble import (
+            ExtendedIsolationForest,
+            MultiViewRandomForestClassifier,
+            MultiViewObliqueRandomForestClassifier,
+        )
         from .ensemble._unsupervised_forest import (
             UnsupervisedRandomForest,
             UnsupervisedObliqueRandomForest,
@@ -88,4 +92,5 @@
         "ExtraTreesRegressor",
         "ExtendedIsolationForest",
         "MultiViewRandomForestClassifier",
+        "MultiViewObliqueRandomForestClassifier",
     ]
diff --git a/sktree/ensemble/__init__.py b/sktree/ensemble/__init__.py
index aa97d0215..15955dc5a 100644
--- a/sktree/ensemble/__init__.py
+++ b/sktree/ensemble/__init__.py
@@ -1,6 +1,6 @@
 from ._eiforest import ExtendedIsolationForest
 from ._honest_forest import HonestForestClassifier
-from ._multiview import MultiViewRandomForestClassifier
+from ._multiview import MultiViewObliqueRandomForestClassifier, MultiViewRandomForestClassifier
 from ._supervised_forest import (
     ExtraObliqueRandomForestClassifier,
     ExtraObliqueRandomForestRegressor,
diff --git a/sktree/ensemble/_multiview.py b/sktree/ensemble/_multiview.py
index 828212335..a44767f0d 100644
--- a/sktree/ensemble/_multiview.py
+++ b/sktree/ensemble/_multiview.py
@@ -1,7 +1,7 @@
 from sklearn.utils._param_validation import StrOptions
 
 from .._lib.sklearn.ensemble._forest import ForestClassifier
-from ..tree import MultiViewDecisionTreeClassifier
+from ..tree import MultiViewDecisionTreeClassifier, MultiViewObliqueDecisionTreeClassifier
 from ..tree._neighbors import SimMatrixMixin
 from ._extensions import ForestClassifierMixin, ForestMixin
 
@@ -292,3 +292,294 @@ def __init__(
         self.min_weight_fraction_leaf = min_weight_fraction_leaf
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
+
+
+class MultiViewObliqueRandomForestClassifier(MultiViewDecisionTreeClassifier):
+    """
+    A multi-view axis-aligned random forest classifier.
+
+    A multi-view random forest is a meta estimator similar to a random
+    forest that fits a number of multi-view decision tree classifiers
+    on various sub-samples of the dataset and uses averaging to
+    improve the predictive accuracy and control over-fitting.
+
+    Parameters
+    ----------
+    n_estimators : int, default=100
+        The number of trees in the forest.
+
+    criterion : {"gini", "entropy"}, default="gini"
+        The function to measure the quality of a split. Supported criteria are
+        "gini" for the Gini impurity and "entropy" for the information gain.
+        Note: this parameter is tree-specific.
+
+    max_depth : int, default=None
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+
+    min_samples_split : int or float, default=2
+        The minimum number of samples required to split an internal node:
+
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
+          `ceil(min_samples_split * n_samples)` are the minimum
+          number of samples for each split.
+
+    min_samples_leaf : int or float, default=1
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
+          `ceil(min_samples_leaf * n_samples)` are the minimum
+          number of samples for each node.
+
+    min_weight_fraction_leaf : float, default=0.0
+        The minimum weighted fraction of the sum total of weights (of all
+        the input samples) required to be at a leaf node. Samples have
+        equal weight when sample_weight is not provided.
+
+    max_features : {"sqrt", "log2", None}, int or float, default="sqrt"
+        The number of features to consider when looking for the best split:
+
+        - If int, then consider `max_features` features at each split.
+        - If float, then `max_features` is a fraction and
+          `round(max_features * n_features)` features are considered at each
+          split.
+        - If "auto", then `max_features=sqrt(n_features)`.
+        - If "sqrt", then `max_features=sqrt(n_features)`.
+        - If "log2", then `max_features=log2(n_features)`.
+        - If None, then `max_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+
+    max_leaf_nodes : int, default=None
+        Grow trees with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+
+    min_impurity_decrease : float, default=0.0
+        A node will be split if this split induces a decrease of the impurity
+        greater than or equal to this value.
+
+        The weighted impurity decrease equation is the following::
+
+            N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                - N_t_L / N_t * left_impurity)
+
+        where ``N`` is the total number of samples, ``N_t`` is the number of
+        samples at the current node, ``N_t_L`` is the number of samples in the
+        left child, and ``N_t_R`` is the number of samples in the right child.
+
+        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+        if ``sample_weight`` is passed.
+
+    bootstrap : bool, default=True
+        Whether bootstrap samples are used when building trees. If False, the
+        whole dataset is used to build each tree.
+
+    oob_score : bool, default=False
+        Whether to use out-of-bag samples to estimate the generalization score.
+        Only available if bootstrap=True.
+
+    n_jobs : int, default=None
+        The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,
+        :meth:`decision_path` and :meth:`apply` are all parallelized over the
+        trees. ``None`` means 1 unless in a `joblib.parallel_backend`
+        context. ``-1`` means using all processors. See :term:`Glossary
+        <n_jobs>` for more details.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls both the randomness of the bootstrapping of the samples used
+        when building trees (if ``bootstrap=True``) and the sampling of the
+        features to consider when looking for the best split at each node
+        (if ``max_features < n_features``).
+        See :term:`Glossary <random_state>` for details.
+
+    verbose : int, default=0
+        Controls the verbosity when fitting and predicting.
+
+    warm_start : bool, default=False
+        When set to ``True``, reuse the solution of the previous call to fit
+        and add more estimators to the ensemble, otherwise, just fit a whole
+        new forest. See :term:`the Glossary <warm_start>`.
+
+    class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \
+            default=None
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
+        [{1:1}, {2:5}, {3:1}, {4:1}].
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+        The "balanced_subsample" mode is the same as "balanced" except that
+        weights are computed based on the bootstrap sample for every tree
+        grown.
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+
+    max_samples : int or float, default=None
+        If bootstrap is True, the number of samples to draw from X
+        to train each base estimator.
+
+        - If None (default), then draw `X.shape[0]` samples.
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples. Thus,
+          `max_samples` should be in the interval `(0.0, 1.0]`.
+
+    feature_set_ends : array-like of int of shape (n_feature_sets,), default=None
+        The indices of the end of each feature set. For example, if the first
+        feature set is the first 10 features, and the second feature set is the
+        next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
+        then this will assume that there is only one feature set.
+
+    Attributes
+    ----------
+    estimators_ : list of sktree.tree.ObliqueDecisionTreeClassifier
+        The collection of fitted sub-estimators.
+
+    classes_ : ndarray of shape (n_classes,) or a list of such arrays
+        The classes labels (single output problem), or a list of arrays of
+        class labels (multi-output problem).
+
+    n_classes_ : int or list
+        The number of classes (single output problem), or a list containing the
+        number of classes for each output (multi-output problem).
+
+    n_features_ : int
+        The number of features when ``fit`` is performed.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    n_outputs_ : int
+        The number of outputs when ``fit`` is performed.
+
+    feature_importances_ : ndarray of shape (n_features,)
+        The impurity-based feature importances.
+        The higher, the more important the feature.
+        The importance of a feature is computed as the (normalized)
+        total reduction of the criterion brought by that feature.  It is also
+        known as the Gini importance.
+
+        Warning: impurity-based feature importances can be misleading for
+        high cardinality features (many unique values). See
+        :func:`sklearn.inspection.permutation_importance` as an alternative.
+
+    oob_score_ : float
+        Score of the training dataset obtained using an out-of-bag estimate.
+        This attribute exists only when ``oob_score`` is True.
+
+    oob_decision_function_ : ndarray of shape (n_samples, n_classes) or \
+            (n_samples, n_classes, n_outputs)
+        Decision function computed with out-of-bag estimate on the training
+        set. If n_estimators is small it might be possible that a data point
+        was never left out during the bootstrap. In this case,
+        `oob_decision_function_` might contain NaN. This attribute exists
+        only when ``oob_score`` is True.
+
+    See Also
+    --------
+    sktree.tree.ObliqueDecisionTreeClassifier : An oblique decision
+        tree classifier.
+    sklearn.ensemble.RandomForestClassifier : An axis-aligned decision
+        forest classifier.
+    """
+
+    tree_type = "oblique"
+    _parameter_constraints: dict = {
+        **MultiViewObliqueDecisionTreeClassifier._parameter_constraints,
+        "class_weight": [
+            StrOptions({"balanced_subsample", "balanced"}),
+            dict,
+            list,
+            None,
+        ],
+    }
+    _parameter_constraints.pop("splitter")
+
+    def __init__(
+        self,
+        n_estimators=100,
+        *,
+        criterion="gini",
+        max_depth=None,
+        min_samples_split=2,
+        min_samples_leaf=1,
+        min_weight_fraction_leaf=0.0,
+        max_features="sqrt",
+        max_leaf_nodes=None,
+        min_impurity_decrease=0.0,
+        bootstrap=True,
+        oob_score=False,
+        n_jobs=None,
+        random_state=None,
+        verbose=0,
+        warm_start=False,
+        class_weight=None,
+        max_samples=None,
+        feature_set_ends=None,
+        feature_combinations=None,
+        cross_feature_set_sampling=False,
+    ):
+        super().__init__(
+            estimator=MultiViewObliqueDecisionTreeClassifier(),
+            n_estimators=n_estimators,
+            estimator_params=(
+                "criterion",
+                "max_depth",
+                "min_samples_split",
+                "min_samples_leaf",
+                "min_weight_fraction_leaf",
+                "max_features",
+                "max_leaf_nodes",
+                "min_impurity_decrease",
+                "random_state",
+                "feature_set_ends",
+                "feature_combinations",
+                "cross_feature_set_sampling",
+            ),
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start,
+            class_weight=class_weight,
+            max_samples=max_samples,
+        )
+        self.criterion = criterion
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.max_features = max_features
+        self.feature_set_ends = feature_set_ends
+        self.feature_combinations = feature_combinations
+        self.cross_feature_set_sampling = cross_feature_set_sampling
+
+        # unused by oblique forests
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_impurity_decrease = min_impurity_decrease
diff --git a/sktree/tests/test_multiview_forest.py b/sktree/tests/test_multiview_forest.py
index da168bbba..d1a66a380 100644
--- a/sktree/tests/test_multiview_forest.py
+++ b/sktree/tests/test_multiview_forest.py
@@ -6,7 +6,11 @@
 from sklearn.model_selection import cross_val_score, train_test_split
 from sklearn.utils.estimator_checks import parametrize_with_checks
 
-from sktree import MultiViewRandomForestClassifier, RandomForestClassifier
+from sktree import (
+    MultiViewObliqueRandomForestClassifier,
+    MultiViewRandomForestClassifier,
+    RandomForestClassifier,
+)
 from sktree.datasets.multiview import make_joint_factor_model
 
 seed = 12345
@@ -15,6 +19,7 @@
 @parametrize_with_checks(
     [
         MultiViewRandomForestClassifier(random_state=12345, n_estimators=10),
+        MultiViewObliqueRandomForestClassifier(random_state=12345, n_estimators=10),
     ]
 )
 def test_sklearn_compatible_estimator(estimator, check):

From f546194db4477f2722b2a744e627bfe2324e53d0 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 12:57:05 -0400
Subject: [PATCH 13/26] Enable multiview oblique rf tests

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_multiview.py         | 10 +++++++++-
 sktree/tests/test_multiview_forest.py | 19 +++++++++++++++----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/sktree/ensemble/_multiview.py b/sktree/ensemble/_multiview.py
index a44767f0d..4f85ddbd3 100644
--- a/sktree/ensemble/_multiview.py
+++ b/sktree/ensemble/_multiview.py
@@ -294,7 +294,7 @@ def __init__(
         self.min_impurity_decrease = min_impurity_decrease
 
 
-class MultiViewObliqueRandomForestClassifier(MultiViewDecisionTreeClassifier):
+class MultiViewObliqueRandomForestClassifier(MultiViewRandomForestClassifier):
     """
     A multi-view axis-aligned random forest classifier.
 
@@ -451,6 +451,14 @@ class MultiViewObliqueRandomForestClassifier(MultiViewDecisionTreeClassifier):
         next 20 features, then ``feature_set_ends = [10, 30]``. If ``None``,
         then this will assume that there is only one feature set.
 
+    feature_combinations : float, default=None
+        The number of feature combinations to consider at each split.
+        If None, then this will default to the number of features in the
+        respective feature set.
+
+    cross_feature_set_sampling : bool, default=False
+        Whether to sample features across feature sets during the oblique splits.
+
     Attributes
     ----------
     estimators_ : list of sktree.tree.ObliqueDecisionTreeClassifier
diff --git a/sktree/tests/test_multiview_forest.py b/sktree/tests/test_multiview_forest.py
index d1a66a380..1a2e1be1c 100644
--- a/sktree/tests/test_multiview_forest.py
+++ b/sktree/tests/test_multiview_forest.py
@@ -26,8 +26,18 @@ def test_sklearn_compatible_estimator(estimator, check):
     check(estimator)
 
 
-@pytest.mark.parametrize("baseline_est", [RandomForestClassifier])
-def test_multiview_classification(baseline_est):
+@pytest.mark.parametrize(
+    "mv_est, kwargs",
+    [
+        (MultiViewRandomForestClassifier, dict()),
+        (MultiViewObliqueRandomForestClassifier, dict(feature_combinations=2)),
+        (
+            MultiViewObliqueRandomForestClassifier,
+            dict(feature_combinations=2, cross_feature_set_sampling=True),
+        ),
+    ],
+)
+def test_multiview_classification(mv_est, kwargs):
     """Test that explicit knowledge of multi-view structure improves classification accuracy.
 
     In very high-dimensional noise setting across two views, when the max_depth and max_features
@@ -66,12 +76,13 @@ def test_multiview_classification(baseline_est):
     y = np.hstack((y0, y1)).T
 
     # Compare multiview decision tree vs single-view decision tree
-    clf = MultiViewRandomForestClassifier(
+    clf = mv_est(
         random_state=seed,
         feature_set_ends=[n_features_1, X.shape[1]],
         max_features="sqrt",
         max_depth=4,
         n_estimators=n_estimators,
+        **kwargs,
     )
     clf.fit(X, y)
     assert (
@@ -81,7 +92,7 @@ def test_multiview_classification(baseline_est):
         cross_val_score(clf, X, y, cv=5).mean() == 1.0
     ), f"CV score: {cross_val_score(clf, X, y, cv=5).mean()}"
 
-    clf = baseline_est(
+    clf = RandomForestClassifier(
         random_state=seed,
         max_depth=4,
         max_features="sqrt",

From a1c6313a34e81e1d30a09ca161426936c9887a1c Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 12:58:13 -0400
Subject: [PATCH 14/26] Add to api.rst

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/api.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/api.rst b/doc/api.rst
index 0c112b384..fb3c24d13 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -66,6 +66,7 @@ how scikit-learn builds trees.
    PatchObliqueRandomForestRegressor
    HonestForestClassifier
    MultiViewRandomForestClassifier
+   MultiViewObliqueRandomForestClassifier
 
 .. currentmodule:: sktree.tree
 .. autosummary::
@@ -77,6 +78,7 @@ how scikit-learn builds trees.
    PatchObliqueDecisionTreeRegressor
    HonestTreeClassifier
    MultiViewDecisionTreeClassifier
+   MultiViewObliqueDecisionTreeClassifier
 
 Unsupervised
 ------------

From 48997cfaabcc9d5d0a57f29e89cf089f72cd74fe Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 13:51:43 -0400
Subject: [PATCH 15/26] Add to api.rst

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_multiview.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sktree/ensemble/_multiview.py b/sktree/ensemble/_multiview.py
index 4f85ddbd3..9fbdd6d88 100644
--- a/sktree/ensemble/_multiview.py
+++ b/sktree/ensemble/_multiview.py
@@ -294,7 +294,9 @@ def __init__(
         self.min_impurity_decrease = min_impurity_decrease
 
 
-class MultiViewObliqueRandomForestClassifier(MultiViewRandomForestClassifier):
+class MultiViewObliqueRandomForestClassifier(
+    SimMatrixMixin, ForestClassifierMixin, ForestMixin, ForestClassifier
+):
     """
     A multi-view axis-aligned random forest classifier.
 

From ef1dc6bf705e579737906d144255f5a807183f80 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 14:33:07 -0400
Subject: [PATCH 16/26] Fix unit tests

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/tree/_multiview.py         |  2 +-
 sktree/tree/_oblique_splitter.pyx | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 0faaf53d9..b0becd559 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -493,7 +493,7 @@ def _update_tree(self, X, y, sample_weight):
         # set decision-tree model parameters
         max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth
 
-        monotonic_cst = self.monotonic_cst_
+        monotonic_cst = None
 
         # Build tree
         # Note: this reconstructs the builder with the same state it had during the
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index d52d760c1..7982c1de9 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -849,6 +849,23 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         self._max_feature_combinations = <intp_t> ceil(self.feature_combinations)
         self.cross_feature_set_sampling = cross_feature_set_sampling
 
+    def __reduce__(self):
+        """Enable pickling the splitter."""
+        return (type(self),
+                (
+                    self.criterion,
+                    self.max_features,
+                    self.min_samples_leaf,
+                    self.min_weight_leaf,
+                    self.random_state,
+                    self.monotonic_cst.base if self.monotonic_cst is not None else None,
+                    self.feature_combinations,
+                    self.feature_set_ends.base if self.feature_set_ends is not None else None,
+                    self.n_feature_sets,
+                    self.max_features_per_set.base if self.max_features_per_set is not None else None,
+                    self.cross_feature_set_sampling,
+                ), self.__getstate__())
+
     cdef int init(
         self,
         object X,

From 9dbac9b467ea58e3c56f412ab8c0af75de14862f Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 16:13:54 -0400
Subject: [PATCH 17/26] Fix unit tests

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/stats/tests/test_forestht.py |  3 --
 sktree/tree/_multiview.py           | 79 +++++++++++++++++++++++++++--
 sktree/tree/tests/test_multiview.py |  4 +-
 3 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 0e08a3e5b..e2dbe20ed 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -83,7 +83,6 @@ def test_small_dataset_independent(seed):
         stratify=True,
         tree_estimator=MultiViewDecisionTreeClassifier(
             feature_set_ends=feature_set_ends,
-            apply_max_features_per_feature_set=True,
         ),
     )
     perm_clf = PermutationHonestForestClassifier(
@@ -97,7 +96,6 @@ def test_small_dataset_independent(seed):
         stratify=True,
         tree_estimator=MultiViewDecisionTreeClassifier(
             feature_set_ends=feature_set_ends,
-            apply_max_features_per_feature_set=True,
         ),
     )
     result = build_coleman_forest(
@@ -208,7 +206,6 @@ def test_comight_repeated_feature_sets(seed):
         stratify=True,
         tree_estimator=MultiViewDecisionTreeClassifier(
             feature_set_ends=feature_set_ends,
-            apply_max_features_per_feature_set=True,
         ),
     )
 
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index b0becd559..f4d4c5207 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -493,8 +493,6 @@ def _update_tree(self, X, y, sample_weight):
         # set decision-tree model parameters
         max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth
 
-        monotonic_cst = None
-
         # Build tree
         # Note: this reconstructs the builder with the same state it had during the
         # initial fit. This is necessary because the builder is not saved as part
@@ -524,7 +522,7 @@ def _update_tree(self, X, y, sample_weight):
                 min_samples_leaf,
                 min_weight_leaf,
                 random_state,
-                monotonic_cst,
+                self.monotonic_cst_,
                 self._feature_combinations_,
                 self.feature_set_ends_,
                 self.n_feature_sets_,
@@ -951,6 +949,7 @@ def _build_tree(
             Controls the randomness of the estimator.
         """
         monotonic_cst = None
+        self.monotonic_cst_ = monotonic_cst
         _, n_features = X.shape
 
         self.feature_combinations_ = (
@@ -1160,3 +1159,77 @@ def _inheritable_fitted_attribute(self):
             "max_features_per_set_",
             "feature_combinations_",
         ]
+
+    def _update_tree(self, X, y, sample_weight):
+        # Update tree
+        max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes
+        min_samples_split = self.min_samples_split_
+        min_samples_leaf = self.min_samples_leaf_
+        min_weight_leaf = self.min_weight_leaf_
+        # set decision-tree model parameters
+        max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth
+
+        # Build tree
+        # Note: this reconstructs the builder with the same state it had during the
+        # initial fit. This is necessary because the builder is not saved as part
+        # of the class, and thus the state may be lost if pickled/unpickled.
+        criterion = self.criterion
+        if not isinstance(criterion, BaseCriterion):
+            criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self._n_classes_)
+        else:
+            # Make a deepcopy in case the criterion has mutable attributes that
+            # might be shared and modified concurrently during parallel fitting
+            criterion = copy.deepcopy(criterion)
+
+        random_state = check_random_state(self.random_state)
+
+        splitter = self.splitter
+        if issparse(X):
+            raise ValueError(
+                "Sparse input is not supported for oblique trees. "
+                "Please convert your data to a dense array."
+            )
+        else:
+            SPLITTERS = OBLIQUE_DENSE_SPLITTERS
+        if not isinstance(self.splitter, ObliqueSplitter):
+            splitter = SPLITTERS[self.splitter](
+                criterion,
+                self.max_features_,
+                min_samples_leaf,
+                min_weight_leaf,
+                random_state,
+                self.monotonic_cst_,
+                self.feature_combinations_,
+                self.feature_set_ends_,
+                self.n_feature_sets_,
+                self.max_features_per_set_,
+                self.cross_feature_set_sampling,
+            )
+
+        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
+        if max_leaf_nodes < 0:
+            builder = DepthFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                self.min_impurity_decrease,
+                self.store_leaf_values,
+            )
+        else:
+            builder = BestFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                max_leaf_nodes,
+                self.min_impurity_decrease,
+                self.store_leaf_values,
+            )
+        builder.initialize_node_queue(self.tree_, X, y, sample_weight)
+        builder.build(self.tree_, X, y, sample_weight)
+
+        self._prune_tree()
+        return self
diff --git a/sktree/tree/tests/test_multiview.py b/sktree/tree/tests/test_multiview.py
index 409455caa..18a0e0a3e 100644
--- a/sktree/tree/tests/test_multiview.py
+++ b/sktree/tree/tests/test_multiview.py
@@ -182,7 +182,7 @@ def test_multiview_separate_feature_set_sampling_is_consistent():
     X = rng.standard_normal(size=(20, 10))
     y = rng.integers(0, 2, size=20)
 
-    # test with max_features as an array but apply_max_features is off
+    # test with max_features as an array
     clf = MultiViewDecisionTreeClassifier(
         random_state=seed,
         feature_set_ends=[1, 3, 6, 10],
@@ -195,7 +195,7 @@ def test_multiview_separate_feature_set_sampling_is_consistent():
     assert_array_equal(clf.max_features_per_set_, [1, 2, 2, 3])
     assert clf.max_features_ == np.sum(clf.max_features_per_set_), np.sum(clf.max_features_per_set_)
 
-    # test with max_features as an array but apply_max_features is off
+    # multiview feature set should be consistent across tres
     other_clf = MultiViewDecisionTreeClassifier(
         random_state=seed,
         feature_set_ends=[1, 3, 6, 10],

From 7aeb6a61cfa2c8bd91360c145e8904d41712ec68 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 16:14:44 -0400
Subject: [PATCH 18/26] Fix unit tests

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 benchmarks_nonasv/bench_forestht.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/benchmarks_nonasv/bench_forestht.py b/benchmarks_nonasv/bench_forestht.py
index 2bf0e6926..59e4dff9b 100644
--- a/benchmarks_nonasv/bench_forestht.py
+++ b/benchmarks_nonasv/bench_forestht.py
@@ -13,8 +13,6 @@
 import seaborn as sns
 from scipy.special import expit
 
-# using an outdated API, but the code could get refactored to use our new API
-# build_coleman_forest, build_oob_forest, etc.
 from sktree.stats import PermutationForestClassifier, PermutationForestRegressor
 
 seed = 12345

From 64edda39e7675626d3cbd871adc98a8086de1c06 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 16:19:11 -0400
Subject: [PATCH 19/26] Remove runtime checks in cython

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/tree/_oblique_splitter.pxd |  1 -
 sktree/tree/_oblique_splitter.pyx | 41 +++++++------------------------
 2 files changed, 9 insertions(+), 33 deletions(-)

diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
index aea477043..6ad086d0f 100644
--- a/sktree/tree/_oblique_splitter.pxd
+++ b/sktree/tree/_oblique_splitter.pxd
@@ -165,7 +165,6 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
 
 # XXX: This splitter is experimental. Expect changes frequently.
 cdef class MultiViewObliqueSplitter(MultiViewSplitter):
-    # cdef const intp_t[:] n_non_zeros_per_set  # the number of non-zero features in each feature set
     cdef intp_t _max_feature_combinations       # Number of non-zero features to sample per projection matrix
     cdef bint cross_feature_set_sampling        # Whether we sample across feature set when creating a projection vector
 
diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
index 7982c1de9..f21b7e1f5 100644
--- a/sktree/tree/_oblique_splitter.pyx
+++ b/sktree/tree/_oblique_splitter.pyx
@@ -1,8 +1,8 @@
 # distutils: language=c++
 # cython: language_level=3
-# cython: boundscheck=True
-# cython: wraparound=True
-# cython: initializedcheck=True
+# cython: boundscheck=False
+# cython: wraparound=False
+# cython: initializedcheck=False
 
 import numpy as np
 
@@ -813,8 +813,7 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
                 if proj_i >= self.max_features:
                     break
 
-# TODO: need to check segfault for multiview oblique splitter
-# REBUILD WITH BOUNDS CHECK
+
 cdef class MultiViewObliqueSplitter(MultiViewSplitter):
     def __cinit__(
         self,
@@ -839,14 +838,12 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         # replaces usage of max_features
         self.max_features_per_set = max_features_per_set
 
-        # compute # of non-zeros expected on average per feature set
-        # cdef intp_t[:] n_non_zeros_per_set = np.zeros(self.n_feature_sets, dtype=np.intp)
-        # cdef intp_t i
-        # for i in range(self.n_feature_sets):
-        #     n_non_zeros_per_set[i] = <intp_t> (self.max_features_per_set[i] * self.feature_combinations)
-        # self.n_non_zeros_per_set = n_non_zeros_per_set
-
+        # each projection vector (i.e. mtry) of each feature set will sample a feature combination of
+        # 1 to "max feature combinations" number of features.
         self._max_feature_combinations = <intp_t> ceil(self.feature_combinations)
+
+        # with cross-feature-set sampling, the projection vector can combine different
+        # feature sets
         self.cross_feature_set_sampling = cross_feature_set_sampling
 
     def __reduce__(self):
@@ -876,26 +873,6 @@ cdef class MultiViewObliqueSplitter(MultiViewSplitter):
         Splitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
 
         self.X = X
-
-        # create a helper array for allowing efficient Fisher-Yates
-        self.multi_indices_to_sample = vector[vector[intp_t]](self.n_feature_sets)
-
-        # Here, we sample the indices of the features to sample in each feature set
-        # as a separate vector. This is done to allow for efficient Fisher-Yates
-        # shuffling of the indices, such that we randomly sample features to consider, but within
-        # each feature set separately. This ensures that the sampled projection matrix consists of
-        # a balanced number of features from each feature set.
-        #
-        # Example:
-        # multi_indices_to_sample[0] = [0, 1, 2, 3]
-        # multi_indices_to_sample[1] = [4, 5]
-        # which corresponds to a feature set with 4 features and another with 2 features.
-        # for i_feature in range(self.n_feature_sets):
-        #     size_of_feature_set = self.feature_set_ends[i_feature] - feature_set_begin
-        #     for ifeat in range(size_of_feature_set):
-        #         self.multi_indices_to_sample[i_feature].push_back(ifeat + feature_set_begin)
-        #     feature_set_begin = self.feature_set_ends[i_feature]
-
         return 0
 
     cdef void sample_proj_mat(

From 65b0f305476ab275cd9b56b7443ad61b69a691b6 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 5 Jul 2024 16:44:00 -0400
Subject: [PATCH 20/26] Fix docs

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v0.9.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.9.rst b/doc/whats_new/v0.9.rst
index 6a7a4badf..41a920383 100644
--- a/doc/whats_new/v0.9.rst
+++ b/doc/whats_new/v0.9.rst
@@ -20,7 +20,7 @@ Changelog
     By `Adam Li`_ :pr:`#265`.
 
 - |Feature| :class:`sktree.tree.MultiViewObliqueDecisionTreeClassifier` is implemented
-    along with its forest version :class:`sktree.ensemble.MultiViewObliqueRandomForestClassifier`.
+    along with its forest version :class:`sktree.MultiViewObliqueRandomForestClassifier`.
     By `Adam Li`_ :pr:`#265`.
 
 Code and Documentation Contributors

From aafeb69af9add5e132a65b2c372a97dc06c5493c Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 9 Jul 2024 10:54:21 -0400
Subject: [PATCH 21/26] Removing

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .gitignore                | 1 +
 treeple/_lib/sklearn_fork | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 delete mode 160000 treeple/_lib/sklearn_fork

diff --git a/.gitignore b/.gitignore
index f48bdcdb2..63dc5abe0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ coverage
 
 commit.txt
 treeple/_lib/sklearn/
+treeple/_lib/sklearn_fork/
 
 *.png
 _data
diff --git a/treeple/_lib/sklearn_fork b/treeple/_lib/sklearn_fork
deleted file mode 160000
index d455aa16e..000000000
--- a/treeple/_lib/sklearn_fork
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d455aa16ee9cc42ce342dd07d9b94db117783fcc

From a003d5a23ab6fda325ca9f37e5c4c54e1a55fe01 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 9 Jul 2024 10:56:56 -0400
Subject: [PATCH 22/26] Fix

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 63dc5abe0..f48bdcdb2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,6 @@ coverage
 
 commit.txt
 treeple/_lib/sklearn/
-treeple/_lib/sklearn_fork/
 
 *.png
 _data

From c1f9257dde1e0098f0b0f89a77d4346c5d5edbbe Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 9 Jul 2024 10:59:15 -0400
Subject: [PATCH 23/26] New submodule

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 treeple/_lib/sklearn_fork | 1 +
 1 file changed, 1 insertion(+)
 create mode 160000 treeple/_lib/sklearn_fork

diff --git a/treeple/_lib/sklearn_fork b/treeple/_lib/sklearn_fork
new file mode 160000
index 000000000..d455aa16e
--- /dev/null
+++ b/treeple/_lib/sklearn_fork
@@ -0,0 +1 @@
+Subproject commit d455aa16ee9cc42ce342dd07d9b94db117783fcc

From b0e0dffc2f77c4e52f319d07d68f8821a932ea2a Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 9 Jul 2024 11:32:44 -0400
Subject: [PATCH 24/26] Fix import

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 treeple/ensemble/_multiview.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/treeple/ensemble/_multiview.py b/treeple/ensemble/_multiview.py
index afc45ae86..aabfc2324 100644
--- a/treeple/ensemble/_multiview.py
+++ b/treeple/ensemble/_multiview.py
@@ -463,7 +463,7 @@ class MultiViewObliqueRandomForestClassifier(
 
     Attributes
     ----------
-    estimators_ : list of sktree.tree.ObliqueDecisionTreeClassifier
+    estimators_ : list of treeple.tree.ObliqueDecisionTreeClassifier
         The collection of fitted sub-estimators.
 
     classes_ : ndarray of shape (n_classes,) or a list of such arrays
@@ -512,7 +512,7 @@ class labels (multi-output problem).
 
     See Also
     --------
-    sktree.tree.ObliqueDecisionTreeClassifier : An oblique decision
+    treeple.tree.ObliqueDecisionTreeClassifier : An oblique decision
         tree classifier.
     sklearn.ensemble.RandomForestClassifier : An axis-aligned decision
         forest classifier.

From 392a729dddfbedc2699f5f9a472b1c0662c77ae1 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 9 Jul 2024 11:35:19 -0400
Subject: [PATCH 25/26] Fix import

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index faf27c35d..20c8c81b1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -83,5 +83,5 @@ repos:
           - tomli
         files: ^(?!doc/use\.rst$).*\.(rst|inc)$
 
-ci:
-  autofix_prs: true
+# ci:
+#   autofix_prs: true

From 347dddb3596c22999fdfd5c1a4baac9a2d6368a0 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 9 Jul 2024 11:35:36 -0400
Subject: [PATCH 26/26] Fix import

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 20c8c81b1..faf27c35d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -83,5 +83,5 @@ repos:
           - tomli
         files: ^(?!doc/use\.rst$).*\.(rst|inc)$
 
-# ci:
-#   autofix_prs: true
+ci:
+  autofix_prs: true