diff --git a/examples/manifold/README.txt b/examples/manifold/README.txt
new file mode 100644
index 000000000..8b1808911
--- /dev/null
+++ b/examples/manifold/README.txt
@@ -0,0 +1,6 @@
+.. _manifold_examples:
+
+Manifold learning with Decision-trees
+-------------------------------------
+
+Examples demonstrating manifold learning using random forest variants.
diff --git a/examples/manifold/plot_kernel_decision_tree.py b/examples/manifold/plot_kernel_decision_tree.py
new file mode 100644
index 000000000..35abb9740
--- /dev/null
+++ b/examples/manifold/plot_kernel_decision_tree.py
@@ -0,0 +1,106 @@
+"""
+======================================
+Custom Kernel Decision Tree Classifier
+======================================
+
+This example shows how to build a manifold oblique decision tree classifier using
+a custom set of user-defined kernel/filter library, such as the Gaussian, or Gabor
+kernels.
+
+The example demonstrates superior performance on a 2D dataset with structured images
+as samples. The dataset is the downsampled MNIST dataset, where each sample is a
+28x28 image. The dataset is downsampled to 14x14, and then flattened to a 196
+dimensional vector. The dataset is then split into a training and testing set.
+
+See :ref:`sphx_glr_auto_examples_plot_projection_matrices` for more information on
+projection matrices and the way they can be sampled.
+"""
+
+import matplotlib.pyplot as plt
+
+# %%
+# Importing the necessary modules
+from sklearn.datasets import fetch_openml
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+
+from treeple.tree import KernelDecisionTreeClassifier
+
+# %%
+# Load the Dataset
+# ----------------
+# We need to load the dataset and split it into training and testing sets.
+
+# Load the dataset
+X, y = fetch_openml("mnist_784", version=1, return_X_y=True)
+
+# Downsample the dataset
+X = X.reshape((-1, 28, 28))
+X = X[:, ::2, ::2]
+X = X.reshape((-1, 196))
+
+# Split the dataset into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# %%
+# Setting up the Custom Kernel Decision Tree Model
+# -------------------------------------------------
+# To set up the custom kernel decision tree model, we need to define typical hyperparameters
+# for the decision tree classifier, such as the maximum depth of the tree and the minimum
+# number of samples required to split an internal node. For the Kernel Decision tree model,
+# we also need to define the kernel function and its parameters.
+
+max_depth = 10
+min_samples_split = 2
+
+# Next, we define the hyperparameters for the custom kernels that we will use.
+# For example, if we want to use a Gaussian kernel with a sigma of 1.0 and a size of 3x3:
+kernel_function = "gaussian"
+kernel_params = {"sigma": 1.0, "size": (3, 3)}
+
+# We can then fit the custom kernel decision tree model to the training set:
+clf = KernelDecisionTreeClassifier(
+    max_depth=max_depth,
+    min_samples_split=min_samples_split,
+    data_dims=(28, 28),
+    min_patch_dims=(1, 1),
+    max_patch_dims=(14, 14),
+    dim_contiguous=(True, True),
+    boundary=None,
+    n_classes=10,
+    kernel_function=kernel_function,
+    n_kernels=500,
+    store_kernel_library=True,
+)
+
+# Fit the decision tree classifier using the custom kernel
+clf.fit(X_train, y_train)
+
+# %%
+# Evaluating the Custom Kernel Decision Tree Model
+# ------------------------------------------------
+# To evaluate the custom kernel decision tree model, we can use the testing set.
+# We can also inspect the important kernels that the tree selected.
+
+# Predict the labels for the testing set
+y_pred = clf.predict(X_test)
+
+# Compute the accuracy score
+accuracy = accuracy_score(y_test, y_pred)
+
+print(f"Kernel decision tree model obtained an accuracy of {accuracy} on MNIST.")
+
+# Get the important kernels from the decision tree classifier
+important_kernels = clf.kernel_arr_
+kernel_dims = clf.kernel_dims_
+kernel_params = clf.kernel_params_
+kernel_library = clf.kernel_library_
+
+# Plot the important kernels
+fig, axes = plt.subplots(
+    nrows=len(important_kernels), ncols=1, figsize=(6, 4 * len(important_kernels))
+)
+for i, kernel in enumerate(important_kernels):
+    axes[i].imshow(kernel, cmap="gray")
+    axes[i].set_title("Kernel {}".format(i + 1))
+plt.show()
diff --git a/examples/splitters/plot_kernel_splitters.py b/examples/splitters/plot_kernel_splitters.py
new file mode 100644
index 000000000..d95a388e6
--- /dev/null
+++ b/examples/splitters/plot_kernel_splitters.py
@@ -0,0 +1,77 @@
+"""
+====================
+Random Kernel Splits
+====================
+
+This example shows how to build a manifold oblique decision tree classifier using
+a custom set of user-defined kernel/filter library, such as the Gaussian, or Gabor
+kernels.
+
+The example demonstrates superior performance on a 2D dataset with structured images
+as samples. The dataset is the downsampled MNIST dataset, where each sample is a
+28x28 image. The dataset is downsampled to 14x14, and then flattened to a 196
+dimensional vector. The dataset is then split into a training and testing set.
+
+See :ref:`sphx_glr_auto_examples_plot_projection_matrices` for more information on
+projection matrices and the way they can be sampled.
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.sparse import csr_matrix
+
+from treeple.tree.manifold._kernel_splitter import Kernel2D
+
+# %%
+# Create a synthetic image
+image_height, image_width = 50, 50
+image = np.random.rand(image_height, image_width).astype(np.float32)
+
+# Generate a Gaussian kernel (example)
+kernel_size = 7
+x = np.linspace(-2, 2, kernel_size)
+y = np.linspace(-2, 2, kernel_size)
+x, y = np.meshgrid(x, y)
+kernel = np.exp(-(x**2 + y**2))
+kernel = kernel / kernel.sum()  # Normalize the kernel
+
+# Vectorize and create a sparse CSR matrix
+kernel_vector = kernel.flatten().astype(np.float32)
+kernel_indices = np.arange(kernel_vector.size)
+kernel_indptr = np.array([0, kernel_vector.size])
+kernel_csr = csr_matrix(
+    (kernel_vector, kernel_indices, kernel_indptr), shape=(1, kernel_vector.size)
+)
+
+# %%
+# Initialize the Kernel2D class
+kernel_sizes = np.array([kernel_size], dtype=np.intp)
+random_state = np.random.RandomState(42)
+print(kernel_csr.dtype, kernel_sizes.dtype, np.intp)
+kernel_2d = Kernel2D(kernel_csr, kernel_sizes, random_state)
+
+# Apply the kernel to the image
+result_value = kernel_2d.apply_kernel_py(image, 0)
+
+# %%
+# Plot the original image, kernel, and result
+fig, axs = plt.subplots(1, 3, figsize=(15, 5))
+
+axs[0].imshow(image, cmap="gray")
+axs[0].set_title("Original Image")
+
+axs[1].imshow(kernel, cmap="viridis")
+axs[1].set_title("Gaussian Kernel")
+
+# Highlight the region where the kernel was applied
+start_x, start_y = random_state.randint(0, image_width - kernel_size + 1), random_state.randint(
+    0, image_height - kernel_size + 1
+)
+image_with_kernel = image.copy()
+image_with_kernel[start_y : start_y + kernel_size, start_x : start_x + kernel_size] *= kernel
+
+axs[2].imshow(image_with_kernel, cmap="gray")
+axs[2].set_title(f"Result: {result_value:.4f}")
+
+plt.tight_layout()
+plt.show()
diff --git a/pyproject.toml b/pyproject.toml
index c0a50d95a..b3acf7f84 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -195,6 +195,7 @@ ignore_missing_imports = true
 no_site_packages = true
 exclude = [
   'treeple/_lib/',
+  'treeple/_lib/*',
   'benchmarks_nonasv/'
 ]
 
diff --git a/treeple/tree/__init__.py b/treeple/tree/__init__.py
index 797338ac3..a7425c213 100644
--- a/treeple/tree/__init__.py
+++ b/treeple/tree/__init__.py
@@ -15,6 +15,7 @@
     UnsupervisedObliqueDecisionTree,
 )
 from ._honest_tree import HonestTreeClassifier
+from ._kernel import KernelDecisionTreeClassifier
 from ._multiview import MultiViewDecisionTreeClassifier
 from ._neighbors import compute_forest_similarity_matrix
 
@@ -34,4 +35,5 @@
     "ExtraTreeClassifier",
     "ExtraTreeRegressor",
     "MultiViewDecisionTreeClassifier",
+    "KernelDecisionTreeClassifier",
 ]
diff --git a/treeple/tree/_kernel.py b/treeple/tree/_kernel.py
new file mode 100644
index 000000000..7b7347a3b
--- /dev/null
+++ b/treeple/tree/_kernel.py
@@ -0,0 +1,595 @@
+import copy
+
+import numpy as np
+from scipy.sparse import csr_matrix, issparse
+
+from .._lib.sklearn.tree._criterion import BaseCriterion
+from .._lib.sklearn.tree._tree import BestFirstTreeBuilder, DepthFirstTreeBuilder
+from ._classes import CRITERIA_CLF, PATCH_DENSE_SPLITTERS, PatchObliqueDecisionTreeClassifier
+from ._oblique_tree import ObliqueTree
+
+
+def gaussian_kernel(shape, sigma=1.0, mu=0.0):
+    """N-dimensional gaussian kernel for the given shape.
+
+    See: https://gist.github.com/liob/e784775e882b83749cb3bbcef480576e
+    """
+    m = np.meshgrid(*[np.linspace(-1, 1, s) for s in shape])
+    d = np.sqrt(np.sum([x * x for x in m], axis=0))
+    g = np.exp(-((d - mu) ** 2 / (2.0 * sigma**2)))
+    return g / np.sum(g)
+
+
+def sample_gaussian_kernels(n_kernels, min_size, max_size, mu=0.0, sigma=1.0):
+    """
+    Sample a set of Gaussian kernels and arrange them into a sparse kernel matrix.
+
+    Parameters:
+    -----------
+    n_kernels : int
+        The number of Gaussian kernels to sample.
+    min_size : int
+        The minimum size of the kernel (inclusive).
+    max_size : int
+        The maximum size of the kernel (inclusive).
+    mu : float or tuple of floats
+        The mean(s) of the Gaussian distribution. If a tuple, random mean values are sampled.
+    sigma : float or tuple of floats
+        The standard deviation(s) of the Gaussian distribution. If a tuple, random sigma values are sampled.
+
+    Returns:
+    --------
+    kernel_matrix : csr_matrix
+        The sparse matrix containing vectorized kernels.
+    kernel_params : dict of arrays
+        The parameters of the kernels that were sampled with keys:
+        - 'size': the size of each kernel; since the kernels are 2D square matrices
+            this is the side length of the kernel.
+        - 'mu': the mean of each kernel
+        - 'sigma': the standard deviation
+    """
+    data = []
+    indices = []
+    indptr = [0]
+    kernel_params = {
+        "size": np.zeros(n_kernels, dtype=np.intp),
+        "mu": np.zeros(n_kernels),
+        "sigma": np.zeros(n_kernels),
+    }
+
+    for i in range(n_kernels):
+        # Sample the size of the kernel
+        size = np.random.randint(min_size, max_size + 1)
+
+        # Sample mu and sigma if they are tuples
+        if isinstance(mu, tuple):
+            mu_sample = np.random.uniform(mu[0], mu[1])
+        else:
+            mu_sample = mu
+
+        if isinstance(sigma, tuple):
+            sigma_sample = np.random.uniform(sigma[0], sigma[1])
+        else:
+            sigma_sample = sigma
+
+        # Create a meshgrid for the kernel
+        x = np.linspace(-1, 1, size)
+        y = np.linspace(-1, 1, size)
+        X, Y = np.meshgrid(x, y)
+
+        # Create the Gaussian kernel
+        kernel = np.exp(-((X - mu_sample) ** 2 + (Y - mu_sample) ** 2) / (2 * sigma_sample**2))
+
+        # Vectorize the kernel and store it in the sparse matrix format
+        kernel_vectorized = kernel.flatten()
+        data.extend(kernel_vectorized)
+        indices.extend(range(size * size))
+        indptr.append(len(data))
+
+        # Store the kernel parameters
+        kernel_params["size"][i] = size
+        kernel_params["mu"][i] = mu_sample
+        kernel_params["sigma"][i] = sigma_sample
+
+    # Convert lists to the appropriate format
+    data = np.array(data)
+    indices = np.array(indices)
+    indptr = np.array(indptr)
+
+    # Create a sparse matrix
+    kernel_matrix = csr_matrix((data, indices, indptr), shape=(n_kernels, max_size * max_size))
+
+    return kernel_matrix, kernel_params
+
+
+class KernelDecisionTreeClassifier(PatchObliqueDecisionTreeClassifier):
+    """Oblique decision tree classifier over data patches combined with Gaussian kernels.
+
+    Parameters
+    ----------
+    criterion : {"gini", "entropy"}, default="gini"
+        The function to measure the quality of a split. Supported criteria are
+        "gini" for the Gini impurity and "entropy" for the information gain.
+    splitter : {"best", "random"}, default="best"
+        The strategy used to choose the split at each node. Supported
+        strategies are "best" to choose the best split and "random" to choose
+        the best random split.
+    max_depth : int, default=None
+        The maximum depth of the tree. If None, then nodes are expanded until
+        all leaves are pure or until all leaves contain less than
+        min_samples_split samples.
+    min_samples_split : int or float, default=2
+        The minimum number of samples required to split an internal node:
+        - If int, then consider `min_samples_split` as the minimum number.
+        - If float, then `min_samples_split` is a fraction and
+        `ceil(min_samples_split * n_samples)` are the minimum
+        number of samples for each split.
+    min_samples_leaf : int or float, default=1
+        The minimum number of samples required to be at a leaf node.
+        A split point at any depth will only be considered if it leaves at
+        least ``min_samples_leaf`` training samples in each of the left and
+        right branches.  This may have the effect of smoothing the model,
+        especially in regression.
+        - If int, then consider `min_samples_leaf` as the minimum number.
+        - If float, then `min_samples_leaf` is a fraction and
+        `ceil(min_samples_leaf * n_samples)` are the minimum
+        number of samples for each node.
+    min_weight_fraction_leaf : float, default=0.0
+        The minimum weighted fraction of the sum total of weights (of all
+        the input samples) required to be at a leaf node. Samples have
+        equal weight when sample_weight is not provided.
+    max_features : int, float or {"auto", "sqrt", "log2"}, default=None
+        The number of features to consider when looking for the best split:
+            - If int, then consider `max_features` features at each split.
+            - If float, then `max_features` is a fraction and
+            `int(max_features * n_features)` features are considered at each
+            split.
+            - If "auto", then `max_features=sqrt(n_features)`.
+            - If "sqrt", then `max_features=sqrt(n_features)`.
+            - If "log2", then `max_features=log2(n_features)`.
+            - If None, then `max_features=n_features`.
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires to
+        effectively inspect more than ``max_features`` features.
+        Note: Compared to axis-aligned Random Forests, one can set
+        max_features to a number greater then ``n_features``.
+    random_state : int, RandomState instance or None, default=None
+        Controls the randomness of the estimator. The features are always
+        randomly permuted at each split, even if ``splitter`` is set to
+        ``"best"``. When ``max_features < n_features``, the algorithm will
+        select ``max_features`` at random at each split before finding the best
+        split among them. But the best found split may vary across different
+        runs, even if ``max_features=n_features``. That is the case, if the
+        improvement of the criterion is identical for several splits and one
+        split has to be selected at random. To obtain a deterministic behaviour
+        during fitting, ``random_state`` has to be fixed to an integer.
+        See :term:`Glossary <random_state>` for details.
+    max_leaf_nodes : int, default=None
+        Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        Best nodes are defined as relative reduction in impurity.
+        If None then unlimited number of leaf nodes.
+    min_impurity_decrease : float, default=0.0
+        A node will be split if this split induces a decrease of the impurity
+        greater than or equal to this value.
+        The weighted impurity decrease equation is the following::
+            N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                - N_t_L / N_t * left_impurity)
+        where ``N`` is the total number of samples, ``N_t`` is the number of
+        samples at the current node, ``N_t_L`` is the number of samples in the
+        left child, and ``N_t_R`` is the number of samples in the right child.
+        ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+        if ``sample_weight`` is passed.
+    class_weight : dict, list of dict or "balanced", default=None
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If None, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+        Note that for multioutput (including multilabel) weights should be
+        defined for each class of every column in its own dict. For example,
+        for four-class multilabel classification weights should be
+        [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of
+        [{1:1}, {2:5}, {3:1}, {4:1}].
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+        For multi-output, the weights of each column of y will be multiplied.
+        Note that these weights will be multiplied with sample_weight (passed
+        through the fit method) if sample_weight is specified.
+    min_patch_dim : array-like, optional
+        The minimum dimensions of a patch, by default 1 along all dimensions.
+    max_patch_dim : array-like, optional
+        The maximum dimensions of a patch, by default 1 along all dimensions.
+    dim_contiguous : array-like of bool, optional
+        Whether or not each patch is sampled contiguously along this dimension.
+    data_dims : array-like, optional
+        The presumed dimensions of the un-vectorized feature vector, by default
+        will be a 1D vector with (1, n_features) shape.
+    boundary : optional, str {'wrap'}
+        The boundary condition to use when sampling patches, by default None.
+        'wrap' corresponds to the boundary condition as is in numpy and scipy.
+    feature_weight : array-like of shape (n_samples,n_features,), default=None
+        Feature weights. If None, then features are equally weighted as is.
+        If provided, then the feature weights are used to weight the
+        patches that are generated. The feature weights are used
+        as follows: for every patch that is sampled, the feature weights over
+        the entire patch is summed and normalizes the patch.
+    kernel : str {'gaussian', 'uniform'}, default='gaussian'
+        The kernel to use.
+    n_kernels : int, optional
+        The number of different kernels to generate. This number should be very high
+        as this generates kernels
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,) or list of ndarray
+        The classes labels (single output problem),
+        or a list of arrays of class labels (multi-output problem).
+    feature_importances_ : ndarray of shape (n_features,)
+        The impurity-based feature importances.
+        The higher, the more important the feature.
+        The importance of a feature is computed as the (normalized)
+        total reduction of the criterion brought by that feature.  It is also
+        known as the Gini importance [4]_.
+        Warning: impurity-based feature importances can be misleading for
+        high cardinality features (many unique values). See
+        :func:`sklearn.inspection.permutation_importance` as an alternative.
+    max_features_ : int
+        The inferred value of max_features.
+    n_classes_ : int or list of int
+        The number of classes (for single output problems),
+        or a list containing the number of classes for each
+        output (for multi-output problems).
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+    n_outputs_ : int
+        The number of outputs when ``fit`` is performed.
+    tree_ : Tree instance
+        The underlying Tree object. Please refer to
+        ``help(sklearn.tree._tree.Tree)`` for
+        attributes of Tree object.
+    min_patch_dims_ : array-like
+        The minimum dimensions of a patch.
+    max_patch_dims_ : array-like
+        The maximum dimensions of a patch.
+    data_dims_ : array-like
+        The presumed dimensions of the un-vectorized feature vector.
+    kernel_arr_ : list of length (n_nodes,) of array-like of shape (patch_dims,)
+        The kernel array that is applied to the patches for this tree.
+        The order is in the same order in which the tree traverses the nodes.
+    kernel_params_ : list of length (n_nodes,)
+        The parameters of the kernel that is applied to the patches for this tree.
+        The order is in the same order in which the tree traverses the nodes.
+    kernel_library_ : array-like of shape (n_kernels,), optional
+        The library of kernels that was chosen from the patches for this tree.
+        Only stored if ``store_kernel_library`` is set to True.
+    kernel_library_params_ : list of length (n_nodes,), optional
+        The parameters of the kernels that was chosen from the patches for this tree.
+        The order is in the same order in which the tree traverses the nodes.
+        Only stored if ``store_kernel_library`` is set to True.
+    References
+    ----------
+    .. footbibliography::
+    """
+
+    _parameter_constraints: dict = {**PatchObliqueDecisionTreeClassifier._parameter_constraints}
+    _parameter_constraints.update(
+        {
+            "kernel": ["str"],
+            "n_kernels": [int, None],
+            "store_kernel_library": [bool],
+        }
+    )
+
+    def __init__(
+        self,
+        *,
+        criterion="gini",
+        splitter="best",
+        max_depth=None,
+        min_samples_split=2,
+        min_samples_leaf=1,
+        min_weight_fraction_leaf=0.0,
+        max_features=None,
+        random_state=None,
+        max_leaf_nodes=None,
+        min_impurity_decrease=0.0,
+        class_weight=None,
+        min_patch_dims=None,
+        max_patch_dims=None,
+        dim_contiguous=None,
+        data_dims=None,
+        boundary=None,
+        feature_weight=None,
+        kernel="gaussian",
+        n_kernels=None,
+        store_kernel_library=False,
+    ):
+        super().__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            max_leaf_nodes=max_leaf_nodes,
+            class_weight=class_weight,
+            random_state=random_state,
+            min_impurity_decrease=min_impurity_decrease,
+        )
+
+        self.min_patch_dims = min_patch_dims
+        self.max_patch_dims = max_patch_dims
+        self.dim_contiguous = dim_contiguous
+        self.data_dims = data_dims
+        self.boundary = boundary
+        self.feature_weight = feature_weight
+
+        self.kernel = kernel
+        self.n_kernel = n_kernels
+        self.store_kernel_library = store_kernel_library
+
+    def _build_tree(
+        self,
+        X,
+        y,
+        sample_weight,
+        min_samples_leaf,
+        min_weight_leaf,
+        max_leaf_nodes,
+        min_samples_split,
+        max_depth,
+        random_state,
+    ):
+        """Build the actual tree.
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csc_matrix``.
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            The target values (class labels) as integers or strings.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. Splits are also
+            ignored if they would result in any single class carrying a
+            negative weight in either child node.
+        min_samples_leaf : int or float
+            The minimum number of samples required to be at a leaf node.
+        min_weight_leaf : float, default=0.0
+        The minimum weighted fraction of the sum total of weights.
+        max_leaf_nodes : int, default=None
+            Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+        min_samples_split : int or float, default=2
+            The minimum number of samples required to split an internal node:
+        max_depth : int, default=None
+            The maximum depth of the tree. If None, then nodes are expanded until
+            all leaves are pure or until all leaves contain less than
+            min_samples_split samples.
+        random_state : int, RandomState instance or None, default=None
+            Controls the randomness of the estimator.
+        """
+        # Build tree
+        criterion = self.criterion
+        if not isinstance(criterion, BaseCriterion):
+            criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self.n_classes_)
+        else:
+            # Make a deepcopy in case the criterion has mutable attributes that
+            # might be shared and modified concurrently during parallel fitting
+            criterion = copy.deepcopy(criterion)
+
+        splitter = self.splitter
+        if issparse(X):
+            raise ValueError(
+                "Sparse input is not supported for oblique trees. "
+                "Please convert your data to a dense array."
+            )
+        else:
+            PATCH_SPLITTERS = PATCH_DENSE_SPLITTERS
+
+        # compute user-defined Kernel library before
+        kernel_library, kernel_dims, kernel_params = self._sample_kernel_library(X, y)
+
+        # Note: users cannot define a splitter themselves
+        splitter = PATCH_SPLITTERS[self.splitter](
+            criterion,
+            self.max_features_,
+            min_samples_leaf,
+            min_weight_leaf,
+            random_state,
+            self.min_patch_dims_,
+            self.max_patch_dims_,
+            self.dim_contiguous_,
+            self.data_dims_,
+            self.boundary,
+            self.feature_weight,
+            kernel_library,
+        )
+
+        self.tree_ = ObliqueTree(self.n_features_in_, self.n_classes_, self.n_outputs_)
+
+        # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
+        if max_leaf_nodes < 0:
+            builder = DepthFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                self.min_impurity_decrease,
+            )
+        else:
+            builder = BestFirstTreeBuilder(
+                splitter,
+                min_samples_split,
+                min_samples_leaf,
+                min_weight_leaf,
+                max_depth,
+                max_leaf_nodes,
+                self.min_impurity_decrease,
+            )
+
+        builder.build(self.tree_, X, y, sample_weight)
+
+        # Set some attributes based on what kernel indices were used in each tree
+        # - construct a tree-nodes like array and set it as a Python attribute, so it is
+        #   exposed to the Python interface
+        # - use the Cython tree.feature array to store the index of the dictionary library
+        #   that was used to split at each node
+        kernel_idx_chosen = self.tree_.feature
+        kernel_lib_chosen = kernel_library[kernel_idx_chosen]
+        kernel_params_chosen = kernel_params[kernel_idx_chosen]
+        self.kernel_arr_ = kernel_lib_chosen
+        self.kernel_dims_ = kernel_dims[kernel_idx_chosen]
+        self.kernel_params_ = kernel_params_chosen
+
+        if self.store_kernel_library:
+            self.kernel_library_ = kernel_library
+            self.kernel_idx_chosen_ = kernel_idx_chosen
+
+        if self.n_outputs_ == 1:
+            self.n_classes_ = self.n_classes_[0]
+            self.classes_ = self.classes_[0]
+
+    def _sample_kernel_library(self, X, y, sample_weight):
+        """Samples the dictionary library that is sampled from in the random forest.
+        A kernel can either be applied with the boundaries of the image in mind, such that
+        the patch can be uniformly applied across all indices of rows of ``X_structured``. This
+        is equivalent to passing in ``boundary = 'wrap'``. Alternatively, a kernel can be sampled,
+        such that the patch is always contained within the boundary of the image. This is equivalent
+        to passing in ``boundary = None``.
+        Parameters
+        ----------
+        X : _type_
+            _description_
+        y : _type_
+            _description_
+        sample_weight : _type_
+            _description_
+        Returns
+        -------
+        kernel_library : array-like of shape (n_kernels, n_features)
+            The dictionary that will be sampled from the random forest. Non-zero entries indicate
+            where the kernel is applied. This is a n-dimensional kernel matrix that is vectorized
+            and placed into the original ``X`` data dimensions of (n_dims,).
+        kernel_dims : list of (n_kernels,) length
+            The dimensions of each kernel that is sampled. For example, (n_dims_x, n_dims_y) in
+            the first element indicates that the first kernel has a shape of (n_dims_x, n_dims_y).
+            This can be used to then place where the top-left seed of each kernel is applied.
+        kernel_params : list of (n_kernels,) length
+            A list of dictionaries representing the parameters of each kernel. This is used to
+            keep track of what kernels and parameterizations were valuable when used in the random
+            forest.
+        """
+        raise NotImplementedError("This method should be implemented in a child class.")
+
+
+class GaussianKernelDecisionTreeClassifier(KernelDecisionTreeClassifier):
+    _parameter_constraints = {
+        **KernelDecisionTreeClassifier._parameter_constraints,
+        # "mu_bounds": ['array-like'],
+        # "sigma_bounds": ['array-like'],
+    }
+
+    def __init__(
+        self,
+        *,
+        criterion="gini",
+        splitter="best",
+        max_depth=None,
+        min_samples_split=2,
+        min_samples_leaf=1,
+        min_weight_fraction_leaf=0,
+        max_features=None,
+        random_state=None,
+        max_leaf_nodes=None,
+        min_impurity_decrease=0,
+        class_weight=None,
+        min_patch_dims=None,
+        max_patch_dims=None,
+        dim_contiguous=None,
+        data_dims=None,
+        boundary=None,
+        feature_weight=None,
+        n_kernels=None,
+        store_kernel_library=False,
+        mu_bounds=(0, 1),
+        sigma_bounds=(0, 1),
+    ):
+        super().__init__(
+            criterion=criterion,
+            splitter=splitter,
+            max_depth=max_depth,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            min_weight_fraction_leaf=min_weight_fraction_leaf,
+            max_features=max_features,
+            random_state=random_state,
+            max_leaf_nodes=max_leaf_nodes,
+            min_impurity_decrease=min_impurity_decrease,
+            class_weight=class_weight,
+            min_patch_dims=min_patch_dims,
+            max_patch_dims=max_patch_dims,
+            dim_contiguous=dim_contiguous,
+            data_dims=data_dims,
+            boundary=boundary,
+            feature_weight=feature_weight,
+            kernel="gaussian",
+            n_kernels=n_kernels,
+            store_kernel_library=store_kernel_library,
+        )
+        self.mu_bounds = mu_bounds
+        self.sigma_bounds = sigma_bounds
+
+    def _sample_kernel_library(self, X, y, sample_weight):
+        """Samples a gaussian kernel library."""
+        rng = np.random.default_rng(self.random_state)
+        kernel_library = []
+        kernel_params = []
+        kernel_dims = []
+
+        # Sample the kernel library
+        ndim = len(self.data_dims_)
+        for _ in range(self.n_kernels):
+            patch_shape = []
+            for idx in range(ndim):
+                # Note: By constraining max patch height/width to be at least the min
+                # patch height/width this ensures that the minimum value of
+                # patch_height and patch_width is 1
+                patch_dim = rng.randint(self.min_patch_dims_[idx], self.max_patch_dims_[idx] + 1)
+
+                # sample the possible patch shape given input parameters
+                if self.boundary == "wrap":
+                    # add circular boundary conditions
+                    delta_patch_dim = self.data_dims_[idx] + 2 * (patch_dim - 1)
+
+                    # sample the top left index for this dimension
+                    top_left_patch_seed = rng.randint(0, delta_patch_dim)
+
+                    # resample the patch dimension due to padding
+                    dim = top_left_patch_seed % delta_patch_dim
+
+                    # resample the patch dimension due to padding
+                    patch_dim = min(
+                        patch_dim, min(dim + 1, self.data_dims_[idx] + patch_dim - dim - 1)
+                    )
+
+                patch_shape.append(patch_dim)
+
+            patch_shape = np.array(patch_shape)
+
+            # sample the sigma and mu parameters
+            sigma = rng.uniform(low=self.mu_bounds[0], high=self.mu_bounds[1])
+            mu = rng.uniform(low=self.sigma_bounds[0], high=self.sigma_bounds[1])
+
+            kernel = gaussian_kernel(shape=patch_shape, sigma=sigma, mu=mu)
+
+            kernel_dims.append(kernel.shape)
+            kernel_library.append(kernel)
+            kernel_params.append({"shape": patch_shape, "sigma": sigma, "mu": mu})
+
+        return kernel_library, kernel_dims, kernel_params
diff --git a/treeple/tree/_oblique_splitter.pxd b/treeple/tree/_oblique_splitter.pxd
index 65ca16e14..1f2805a2b 100644
--- a/treeple/tree/_oblique_splitter.pxd
+++ b/treeple/tree/_oblique_splitter.pxd
@@ -59,6 +59,12 @@ cdef class BaseObliqueSplitter(Splitter):
         vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
 
+    cdef void sample_proj_vec(
+        self,
+        vector[float32_t]& proj_vec_weights,
+        vector[intp_t]& proj_vec_indices
+    ) noexcept nogil
+
     # Redefined here since the new logic requires calling sample_proj_mat
     cdef int node_reset(
         self,
diff --git a/treeple/tree/_oblique_splitter.pyx b/treeple/tree/_oblique_splitter.pyx
index 0cceac664..a65ae19df 100644
--- a/treeple/tree/_oblique_splitter.pyx
+++ b/treeple/tree/_oblique_splitter.pyx
@@ -98,7 +98,7 @@ cdef class BaseObliqueSplitter(Splitter):
         const intp_t[:] samples,
         float32_t[:] feature_values,
         vector[float32_t]* proj_vec_weights,  # weights of the vector (n_non_zeros,)
-        vector[intp_t]* proj_vec_indices    # indices of the features (n_non_zeros,)
+        vector[intp_t]* proj_vec_indices      # indices of the features (n_non_zeros,)
     ) noexcept nogil:
         """Compute the feature values for the samples[start:end] range.
 
@@ -121,6 +121,12 @@ cdef class BaseObliqueSplitter(Splitter):
                     feature_values[idx] = 0.0
                 feature_values[idx] += self.X[samples[idx], col_idx] * col_weight
 
+    cdef void sample_proj_vec(
+        self,
+        vector[float32_t]& proj_vec_weights,
+        vector[intp_t]& proj_vec_indices
+    ) noexcept nogil:
+        pass
 
 cdef class ObliqueSplitter(BaseObliqueSplitter):
     def __cinit__(
@@ -202,6 +208,43 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         # self.feature_weights = np.ones((self.n_features,), dtype=float32_t) / self.n_features
         return 0
 
+    cdef void sample_proj_vec(
+        self,
+        vector[float32_t]& proj_vec_weights,
+        vector[intp_t]& proj_vec_indices
+    ) noexcept nogil:
+        cdef intp_t n_features = self.n_features
+        cdef intp_t n_non_zeros = self.n_non_zeros
+        cdef uint32_t* random_state = &self.rand_r_state
+
+        cdef intp_t i, feat_i, proj_i, rand_vec_index
+        cdef float32_t weight
+
+        # construct an array to sample from mTry x n_features set of indices
+        cdef intp_t[::1] indices_to_sample = self.indices_to_sample
+        cdef intp_t grid_size = self.max_features * self.n_features
+
+        # shuffle indices over the 2D grid to sample using Fisher-Yates
+        fisher_yates_shuffle(indices_to_sample, grid_size, random_state)
+
+        # sample 'n_non_zeros' in a mtry X n_features projection matrix
+        # which consists of +/- 1's chosen at a 1/2s rate
+        for i in range(0, n_non_zeros):
+            # get the next index from the shuffled index array
+            rand_vec_index = indices_to_sample[i]
+
+            # get the projection index (i.e. row of the projection matrix) and
+            # feature index (i.e. column of the projection matrix)
+            proj_i = rand_vec_index // n_features
+            feat_i = rand_vec_index % n_features
+
+            # sample a random weight
+            weight = 1 if (rand_int(0, 2, random_state) == 1) else -1
+
+            proj_vec_indices[proj_i].push_back(feat_i)  # Store index of nonzero
+            proj_vec_weights[proj_i].push_back(weight)  # Store weight of nonzero
+
+
     cdef void sample_proj_mat(
         self,
         vector[vector[float32_t]]& proj_mat_weights,
@@ -322,6 +365,8 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
             # XXX: 'feature' is not actually used in oblique split records
             # Just indicates which split was sampled
             current_split.feature = feat_i
+            
+            # sample the projection vector
             current_split.proj_vec_weights = &self.proj_mat_weights[feat_i]
             current_split.proj_vec_indices = &self.proj_mat_indices[feat_i]
 
diff --git a/treeple/tree/_utils.pxd b/treeple/tree/_utils.pxd
index ba2707791..8db48e8a1 100644
--- a/treeple/tree/_utils.pxd
+++ b/treeple/tree/_utils.pxd
@@ -6,6 +6,8 @@ cimport numpy as cnp
 
 cnp.import_array()
 
+from libcpp.vector cimport vector
+
 from .._lib.sklearn.tree._splitter cimport SplitRecord
 from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, int32_t, intp_t, uint32_t
 
@@ -22,7 +24,7 @@ cdef void fisher_yates_shuffle(
 ) noexcept nogil
 
 
-cdef int rand_weighted_binary(
+cdef intp_t rand_weighted_binary(
     float64_t p0,
     uint32_t* random_state
 ) noexcept nogil
@@ -47,3 +49,15 @@ cdef intp_t ravel_multi_index_cython(
     vector_or_memview coords,
     const intp_t[:] shape
 ) noexcept nogil
+
+cdef void compute_vectorized_indices_from_cartesian(
+    intp_t base_index,
+    vector[vector[intp_t]]& index_arrays,
+    const intp_t[:] data_dims,
+    vector[intp_t]& result
+) noexcept nogil
+
+cdef memoryview[float32_t, ndim=3] init_2dmemoryview(
+    cnp.ndarray array,
+    const intp_t[:] data_dims
+)
diff --git a/treeple/tree/_utils.pyx b/treeple/tree/_utils.pyx
index 7ce48977b..b85948c03 100644
--- a/treeple/tree/_utils.pyx
+++ b/treeple/tree/_utils.pyx
@@ -5,6 +5,8 @@
 # cython: wraparound=False
 # cython: initializedcheck=False
 
+from libcpp.vector cimport vector
+
 import numpy as np
 
 cimport numpy as cnp
@@ -20,6 +22,7 @@ cdef inline void fisher_yates_shuffle(
     uint32_t* random_state,
 ) noexcept nogil:
     """Shuffle the indices in place using the Fisher-Yates algorithm.
+
     Parameters
     ----------
     indices_to_sample : A C++ vector or 1D memoryview
@@ -41,7 +44,7 @@ cdef inline void fisher_yates_shuffle(
             indices_to_sample[i], indices_to_sample[j]
 
 
-cdef inline int rand_weighted_binary(
+cdef inline intp_t rand_weighted_binary(
     float64_t p0,
     uint32_t* random_state
 ) noexcept nogil:
@@ -61,7 +64,8 @@ cdef inline int rand_weighted_binary(
     else:
         return 1
 
-cpdef unravel_index(
+
+cdef inline void unravel_index_cython(
     intp_t index,
     cnp.ndarray[intp_t, ndim=1] shape
 ):
@@ -150,7 +154,7 @@ cdef inline intp_t ravel_multi_index_cython(
     Parameters
     ----------
     coords : intp_t[:] or vector[intp_t]
-         An array of coordinates to be converted and vectorized into a sinlg
+        An array of coordinates to be converted and vectorized into a single index.
     shape : numpy.ndarray[intp_t, ndim=1]
         The shape of the array into which the coordinates should be converted.
 
@@ -180,3 +184,146 @@ cdef inline intp_t ravel_multi_index_cython(
             flat_index *= shape[i + 1]
 
     return flat_index
+
+
+cdef inline void compute_vectorized_indices_from_cartesian(
+    intp_t base_index,
+    vector[vector[intp_t]]& index_arrays,
+    const intp_t[:] data_dims,
+    vector[intp_t]& result
+) noexcept nogil:
+    """
+    Compute the vectorized indices for all index sets from the Cartesian product of index arrays.
+
+    :param base_index: The initial vectorized index (top-left point in the patch).
+    :param index_arrays: A list of lists, where each sublist contains the indices for that dimension.
+    :param data_dims: The dimensions of the data.
+    :param result: C++ vector to store the vectorized indices.
+    """
+    cdef int n_dims = len(index_arrays)
+    cdef vector[intp_t] indices = vector[intp_t](n_dims)
+    cdef intp_t vectorized_index
+    cdef intp_t i
+    cdef intp_t n_products = 1
+
+    # Calculate the total number of Cartesian products
+    for i in range(n_dims):
+        n_products *= len(index_arrays[i])
+
+    # Initialize the current indices for each dimension
+    cdef vector[intp_t] current_indices = vector[intp_t](n_dims)
+    for i in range(n_dims):
+        current_indices[i] = 0
+
+    # Iterate over Cartesian products and compute vectorized indices
+    for _ in range(n_products):
+        # Compute the vectorized index for the current combination
+        for i in range(n_dims):
+            indices[i] = index_arrays[i][current_indices[i]]
+        vectorized_index = ravel_multi_index_cython(
+            indices, data_dims
+        )
+        result.push_back(vectorized_index)
+
+        # Move to the next combination
+        for i in range(n_dims - 1, -1, -1):
+            current_indices[i] += 1
+            if current_indices[i] < len(index_arrays[i]):
+                break
+            current_indices[i] = 0
+            if i == 0:
+                return  # Finished all combinations
+
+
+cdef vector[vector[intp_t]] cartesian_cython(
+    vector[vector[intp_t]] sequences
+) noexcept nogil:
+    cdef vector[vector[intp_t]] results = vector[vector[intp_t]](1)
+    cdef vector[vector[intp_t]] next_results
+    for new_values in sequences:
+        for result in results:
+            for value in new_values:
+                result_copy = result
+                result_copy.push_back(value)
+                next_results.push_back(result_copy)
+        results = next_results
+        next_results.clear()
+    return results
+
+
+cpdef cartesian_python(vector[vector[intp_t]]& sequences):
+    return cartesian_cython(sequences)
+
+
+cdef memoryview[float32_t, ndim=3] init_2dmemoryview(
+    cnp.ndarray array,
+    const intp_t[:] data_dims
+):
+    cdef intp_t n_samples = array.shape[0]
+    cdef intp_t ndim = data_dims.shape[0]
+    assert ndim == 2, "Currently only 2D images are accepted."
+
+    # Reshape the array into (n_samples, *data_dims)
+    cdef cnp.ndarray reshaped_array = array.reshape((n_samples, data_dims[0], data_dims[1]))
+
+    # Create a memoryview with the appropriate number of dimensions
+    cdef memoryview[float32_t, ndim=3] view = <memoryview[float32_t, ndim=3]>reshaped_array
+    return view
+
+####################################################################################################
+# CPDEF functions for testing purposes
+####################################################################################################
+
+cpdef unravel_index(
+    intp_t index,
+    cnp.ndarray[intp_t, ndim=1] shape
+):
+    """Converts a flat index or array of flat indices into a tuple of coordinate arrays.
+
+    Purely used for testing purposes.
+
+    Parameters
+    ----------
+    index : intp_t
+        A flat index.
+    shape : numpy.ndarray[intp_t, ndim=1]
+        The shape of the array into which the flat indices should be converted.
+
+    Returns
+    -------
+    numpy.ndarray[intp_t, ndim=1]
+        A coordinate array having the same shape as the input `shape`.
+    """
+    index = np.intp(index)
+    shape = np.array(shape, dtype=np.intp)
+    coords = np.empty(shape.shape[0], dtype=np.intp)
+
+    cdef const intp_t[:] shape_memview = shape
+    cdef intp_t[:] coords_memview = coords
+    unravel_index_cython(index, shape_memview, coords_memview)
+    return coords
+
+
+cpdef ravel_multi_index(intp_t[:] coords, const intp_t[:] shape):
+    """Converts a tuple of coordinate arrays into a flat index.
+
+    Purely used for testing purposes.
+
+    Parameters
+    ----------
+    coords : numpy.ndarray[intp_t, ndim=1]
+        An array of coordinate arrays to be converted.
+    shape : numpy.ndarray[intp_t, ndim=1]
+        The shape of the array into which the coordinates should be converted.
+
+    Returns
+    -------
+    intp_t
+        The resulting flat index.
+
+    Raises
+    ------
+    ValueError
+        If the input `coords` have invalid indices.
+    """
+    return ravel_multi_index_cython(coords, shape)
diff --git a/treeple/tree/kernels.py b/treeple/tree/kernels.py
deleted file mode 100644
index 6aab826a9..000000000
--- a/treeple/tree/kernels.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import numpy as np
-
-
-def gaussian_kernel(shape, sigma=1.0, mu=0.0):
-    """N-dimensional gaussian kernel for the given shape.
-
-    See: https://gist.github.com/liob/e784775e882b83749cb3bbcef480576e
-    """
-    m = np.meshgrid(*[np.linspace(-1, 1, s) for s in shape])
-    d = np.sqrt(np.sum([x * x for x in m], axis=0))
-    g = np.exp(-((d - mu) ** 2 / (2.0 * sigma**2)))
-    return g / np.sum(g)
diff --git a/treeple/tree/manifold/_kernel_splitter.pxd b/treeple/tree/manifold/_kernel_splitter.pxd
new file mode 100644
index 000000000..d0005d13c
--- /dev/null
+++ b/treeple/tree/manifold/_kernel_splitter.pxd
@@ -0,0 +1,37 @@
+import numpy as np
+
+from libcpp.vector cimport vector
+
+from ..._lib.sklearn.tree._splitter cimport SplitRecord
+from ..._lib.sklearn.utils._typedefs cimport (
+    float32_t,
+    float64_t,
+    int32_t,
+    intp_t,
+    uint8_t,
+    uint32_t,
+)
+from .._oblique_splitter cimport BestObliqueSplitter, ObliqueSplitRecord
+from ._morf_splitter cimport PatchSplitter
+
+
+cdef class UserKernelSplitter(PatchSplitter):
+    """A class to hold user-specified kernels."""
+    # cdef vector[float32_t[:, ::1]] kernel_dictionary  # A list of C-contiguous 2D kernels
+    cdef vector[float32_t*] kernel_dictionary  # A list of C-contiguous 2D kernels
+    cdef vector[intp_t*] kernel_dims         # A list of arrays storing the dimensions of each kernel in `kernel_dictionary`
+
+
+cdef class GaussianKernelSplitter(PatchSplitter):
+    """A class to hold Gaussian kernels.
+
+    Overrides the weights that are generated to be sampled from a Gaussian distribution.
+    See: https://www.tutorialspoint.com/gaussian-filter-generation-in-cplusplus
+    See: https://gist.github.com/thomasaarholt/267ec4fff40ca9dff1106490ea3b7567
+    """
+
+    cdef void sample_proj_mat(
+        self,
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices
+    ) noexcept nogil
diff --git a/treeple/tree/manifold/_kernel_splitter.pyx b/treeple/tree/manifold/_kernel_splitter.pyx
new file mode 100644
index 000000000..ee8daf801
--- /dev/null
+++ b/treeple/tree/manifold/_kernel_splitter.pyx
@@ -0,0 +1,351 @@
+import numpy as np
+
+cimport numpy as cnp
+from cython.operator cimport dereference as deref
+from libcpp.vector cimport vector
+
+from ..._lib.sklearn.tree._criterion cimport Criterion
+from ..._lib.sklearn.tree._utils cimport RAND_R_MAX, rand_int
+from ..._lib.sklearn.utils._typedefs cimport float32_t, int32_t
+from .._utils cimport ravel_multi_index_cython, unravel_index_cython
+
+ctypedef float32_t* float32_t_ptr
+ctypedef intp_t* intp_t_ptr
+
+
+cdef class Kernel2D:
+    cdef const intp_t[:] kernels_size
+    cdef const float32_t[:] kernel_data
+    cdef const int32_t[:] kernel_indices
+    cdef const int32_t[:] kernel_indptr
+    cdef uint32_t rand_r_state           # sklearn_rand_r random number state
+    cdef object random_state             # Random state
+
+    cdef intp_t n_dims_largest
+    cdef intp_t n_kernels
+
+    def __cinit__(
+        self,
+        object kernel_matrix,
+        cnp.ndarray kernel_sizes,
+        object random_state,
+        *argv
+    ):
+        """
+        Initialize the class with a CSR sparse matrix and a 1D array of kernel sizes.
+        """
+        self.kernels_size = kernel_sizes
+        self.kernel_data = kernel_matrix.data
+        self.kernel_indices = kernel_matrix.indices
+        self.kernel_indptr = kernel_matrix.indptr
+        self.n_kernels, self.n_dims_largest = kernel_matrix.shape
+        self.random_state = random_state
+        self.rand_r_state = self.random_state.randint(0, RAND_R_MAX)
+
+    cdef inline float32_t apply_kernel(
+        self,
+        float32_t[:, ::1] image,
+        intp_t kernel_idx
+    ) noexcept nogil: 
+        """
+        Apply a kernel to a random point on a 2D image and return the resulting value.
+        
+        Parameters:
+        -----------
+        image : double[:, :]
+            The 2D image array (image_height, image_width).
+        kernel_idx : intp_t
+            The index of the kernel to apply.
+        
+        Returns:
+        --------
+        result : double
+            The resulting value after applying the kernel.
+        """
+        cdef uint32_t* random_state = &self.rand_r_state
+        cdef intp_t img_height = image.shape[0]
+        cdef intp_t img_width = image.shape[1]
+        cdef intp_t size = self.kernels_size[kernel_idx]
+        
+        # Ensure the kernel fits within the image dimensions
+        cdef intp_t max_x = img_width - 1
+        cdef intp_t max_y = img_height - 1
+        
+        # Sample a random top-left point to apply the kernel
+        cdef intp_t start_x = rand_int(0, max_x + 1, random_state)
+        cdef intp_t start_y = rand_int(0, max_y + 1, random_state)
+        
+        # Compute the result
+        cdef float32_t result = 0.0
+        cdef intp_t idx, row, col
+        cdef float32_t image_value, kernel_value
+        
+        cdef intp_t start = self.kernel_indptr[kernel_idx]
+        cdef intp_t end = self.kernel_indptr[kernel_idx + 1]
+        
+        for idx in range(start, end):
+            row = self.kernel_indices[idx] // size
+            col = self.kernel_indices[idx] % size
+            
+            # Only process the kernel if it's within the image bounds
+            if image_value and kernel_value and (start_y + row <= max_y) and (start_x + col <= max_x):
+                image_value = image[start_y + row, start_x + col]
+                kernel_value = self.kernel_data[idx]
+                result += image_value * kernel_value
+        
+        return result
+
+    def apply_kernel_py(self, float32_t[:, ::1] image, intp_t kernel_idx):
+        """
+        Python-accessible wrapper for apply_kernel_nogil.
+        """
+        return self.apply_kernel(image, kernel_idx)
+
+
+
+
+# cdef class UserKernelSplitter(PatchSplitter):
+#     def __cinit__(
+#         self,
+#         criterion: Criterion,
+#         max_features: intp_t,
+#         min_samples_leaf: intp_t,
+#         min_weight_leaf: double,
+#         random_state: object,
+#         min_patch_dims: intp_t,
+#         max_patch_dims: intp_t,
+#         dim_contiguous: cnp.uint8_t,
+#         data_dims: intp_t,
+#         boundary: str,
+#         feature_weight: float32_t,
+#         kernel_dictionary: Kernel2D,
+#         *argv
+#     ):
+#         # initialize the kernel dictionary into a vector to allow Cython to see it
+#         # see: https://stackoverflow.com/questions/46240207/passing-list-of-numpy-arrays-to-c-using-cython
+#         cdef intp_t n_arrays = len(kernel_dictionary)
+#         self.kernel_dictionary = vector[float32_t_ptr](n_arrays)  # A list of C-contiguous 2D kernels
+#         self.kernel_dims = vector[intp_t_ptr](n_arrays)         # A list of arrays storing the dimensions of each kernel in `kernel_dictionary`
+
+#         # buffers to point to each element in the list
+#         cdef float32_t[:] kernel
+#         cdef intp_t[:] kernel_dim
+
+#         cdef intp_t i
+#         for i in range(n_arrays):
+#             kernel = kernel_dictionary[i]
+#             kernel_dim = kernel_dims[i]
+
+#             # store a pointer to the data
+#             self.kernel_dictionary.push_back(&kernel[0])
+#             self.kernel_dims.push_back(&kernel_dim[0])
+
+#     cdef inline void compute_features_over_samples(
+#         self,
+#         intp_t start,
+#         intp_t end,
+#         const intp_t[:] samples,
+#         float32_t[:] feature_values,
+#         vector[float32_t]* proj_vec_weights,  # weights of the vector (max_features,)
+#         vector[intp_t]* proj_vec_indices      # indices of the features (max_features,)
+#     ) noexcept nogil:
+#         """Compute the feature values for the samples[start:end] range.
+
+#         Returns -1 in case of failure to allocate memory (and raise MemoryError)
+#         or 0 otherwise.
+#         """
+#         cdef intp_t idx, jdx
+#         cdef intp_t col_idx
+#         cdef float32_t col_weight
+
+#         # Compute linear combination of features and then
+#         # sort samples according to the feature values.
+#         for jdx in range(0, proj_vec_indices.size()):
+#             col_idx = deref(proj_vec_indices)[jdx]
+#             col_weight = deref(proj_vec_weights)[jdx]
+
+#             for idx in range(start, end):
+#                 # initialize the feature value to 0
+#                 if jdx == 0:
+#                     feature_values[idx] = 0.0
+#                 feature_values[idx] += self.X[samples[idx], col_idx] * col_weight
+
+
+#     cdef void sample_proj_mat(
+#         self,
+#         vector[vector[float32_t]]& proj_mat_weights,
+#         vector[vector[intp_t]]& proj_mat_indices
+#     ) noexcept nogil:
+#         """Sample projection matrix using a contiguous patch.
+#         Randomly sample patches with weight of 1.
+#         """
+#         cdef intp_t max_features = self.max_features
+#         cdef intp_t proj_i
+
+#         # define parameters for vectorized points in the original data shape
+#         # and top-left seed
+#         cdef intp_t top_left_patch_seed
+
+#         # size of the sampled patch, which is just the size of the n-dim patch
+#         # (\prod_i self.patch_dims_buff[i])
+#         cdef intp_t patch_size
+
+#         cdef float32_t[:] kernel
+#         cdef intp_t[:] kernel_dim
+
+#         for proj_i in range(0, max_features):
+#             # now get the top-left seed that is used to then determine the top-left
+#             # position in patch
+#             # compute top-left seed for the multi-dimensional patch
+#             top_left_patch_seed, patch_size = self.sample_top_left_seed()
+
+#             # sample a random index in the kernel library
+#             kernel_idx = rand_int(0, self.kernel_dictionary.size(), &self.rand_r_state)
+
+#             # get that kernel and add it to the projection vector indices and weights
+#             kernel = self.kernel_dictionary[kernel_idx]
+#             kernel_dim = self.kernel_dims[kernel_idx]
+
+#             # convert top-left-patch-seed to unraveled indices
+#             # get the top-left index in the original data
+#             top_left_idx = self.unravel_index(top_left_patch_seed, self.data_dims_buff, self.ndim)
+
+#             # loop over the kernel and add the weights and indices to the projection
+#             for idim in range(self.ndim):
+#                 # get the dimension of the kernel
+#                 kernel_dim = self.kernel_dims[kernel_idx][idim]
+
+#                 # get the top-left index in the kernel
+#                 top_left_kernel_idx = self.unravel_index(top_left_patch_seed, kernel_dim, self.ndim)
+
+                # loop over the kernel and add the weights and indices to the projection
+                # for i in range(kernel_dim):
+                #     # get the index in the original data
+                #     idx = self.ravel_multi_index(top_left_idx, self.data_dims_buff, self.ndim)
+
+                #     # get the index in the kernel
+                #     kernel_idx = self.ravel_multi_index(top_left_kernel_idx, kernel_dim, self.ndim)
+
+                #     # add the weight and index to the projection matrix
+                #     proj_mat_weights[proj_i].push_back(kernel[kernel_idx])
+                #     proj_mat_indices[proj_i].push_back(idx)
+
+                #     # increment the top-left index in the original data
+                #     top_left_idx[idim] += 1
+
+                #     # increment the top-left index in the kernel
+                #     top_left_kernel_idx[idim] += 1
+
+                # # increment the top-left index in the original data
+                # top_left_idx[idim] += self.patch_dims_buff[idim] - kernel_dim
+
+                # # increment the top-left index in the kernel
+                # top_left_kernel_idx[idim] += self.patch_dims_buff[idim] - kernel_dim
+
+
+# cdef class UserKernelSplitter(PatchSplitter):
+#     def __cinit__(
+#         self,
+#         criterion: Criterion,
+#         max_features: intp_t,
+#         min_samples_leaf: intp_t,
+#         min_weight_leaf: double,
+#         random_state: object,
+#         min_patch_dims: intp_t,
+#         max_patch_dims: intp_t,
+#         dim_contiguous: cnp.uint8_t,
+#         data_dims: intp_t,
+#         boundary: str,
+#         feature_weight: float32_t,
+#         kernel_dictionary: Kernel2D,
+#         *argv
+#     ):
+#         # initialize the kernel dictionary into a vector to allow Cython to see it
+#         # see: https://stackoverflow.com/questions/46240207/passing-list-of-numpy-arrays-to-c-using-cython
+#         cdef intp_t n_arrays = len(kernel_dictionary)
+#         self.kernel_dictionary = vector[float32_t_ptr](n_arrays)  # A list of C-contiguous 2D kernels
+#         self.kernel_dims = vector[intp_t_ptr](n_arrays)         # A list of arrays storing the dimensions of each kernel in `kernel_dictionary`
+
+#         # buffers to point to each element in the list
+#         cdef float32_t[:] kernel
+#         cdef intp_t[:] kernel_dim
+
+#         cdef intp_t i
+#         for i in range(n_arrays):
+#             kernel = kernel_dictionary[i]
+#             kernel_dim = kernel_dims[i]
+
+#             # store a pointer to the data
+#             self.kernel_dictionary.push_back(&kernel[0])
+#             self.kernel_dims.push_back(&kernel_dim[0])
+
+#     cdef void sample_proj_mat(
+#         self,
+#         vector[vector[float32_t]]& proj_mat_weights,
+#         vector[vector[intp_t]]& proj_mat_indices
+#     ) noexcept nogil:
+#         """Sample projection matrix using a contiguous patch.
+#         Randomly sample patches with weight of 1.
+#         """
+#         cdef intp_t max_features = self.max_features
+#         cdef intp_t proj_i
+
+#         # define parameters for vectorized points in the original data shape
+#         # and top-left seed
+#         cdef intp_t top_left_patch_seed
+
+#         # size of the sampled patch, which is just the size of the n-dim patch
+#         # (\prod_i self.patch_dims_buff[i])
+#         cdef intp_t patch_size
+
+#         cdef float32_t[:] kernel
+#         cdef intp_t[:] kernel_dim
+
+#         for proj_i in range(0, max_features):
+#             # now get the top-left seed that is used to then determine the top-left
+#             # position in patch
+#             # compute top-left seed for the multi-dimensional patch
+#             top_left_patch_seed, patch_size = self.sample_top_left_seed()
+
+#             # sample a random index in the kernel library
+#             # kernel_idx = 
+
+#             # get that kernel and add it to the projection vector indices and weights
+#             kernel = self.kernel_dictionary[kernel_idx]
+#             kernel_dim = self.kernel_dims[kernel_idx]
+
+#             # convert top-left-patch-seed to unraveled indices
+#             # get the top-left index in the original data
+#             top_left_idx = self.unravel_index(top_left_patch_seed, self.data_dims_buff, self.ndim)
+
+#             # loop over the kernel and add the weights and indices to the projection
+#             for idim in range(self.ndim):
+#                 # get the dimension of the kernel
+#                 kernel_dim = self.kernel_dims[kernel_idx][idim]
+
+#                 # get the top-left index in the kernel
+#                 top_left_kernel_idx = self.unravel_index(top_left_patch_seed, kernel_dim, self.ndim)
+
+#                 # loop over the kernel and add the weights and indices to the projection
+#                 # for i in range(kernel_dim):
+#                 #     # get the index in the original data
+#                 #     idx = self.ravel_multi_index(top_left_idx, self.data_dims_buff, self.ndim)
+
+#                 #     # get the index in the kernel
+#                 #     kernel_idx = self.ravel_multi_index(top_left_kernel_idx, kernel_dim, self.ndim)
+
+#                 #     # add the weight and index to the projection matrix
+#                 #     proj_mat_weights[proj_i].push_back(kernel[kernel_idx])
+#                 #     proj_mat_indices[proj_i].push_back(idx)
+
+#                 #     # increment the top-left index in the original data
+#                 #     top_left_idx[idim] += 1
+
+#                 #     # increment the top-left index in the kernel
+#                 #     top_left_kernel_idx[idim] += 1
+
+#                 # # increment the top-left index in the original data
+#                 # top_left_idx[idim] += self.patch_dims_buff[idim] - kernel_dim
+
+#                 # # increment the top-left index in the kernel
+#                 # top_left_kernel_idx[idim] += self.patch_dims_buff[idim] - kernel_dim
diff --git a/treeple/tree/manifold/_morf_splitter.pxd b/treeple/tree/manifold/_morf_splitter.pxd
index 2b65fd3ba..11e5ef9c3 100644
--- a/treeple/tree/manifold/_morf_splitter.pxd
+++ b/treeple/tree/manifold/_morf_splitter.pxd
@@ -33,11 +33,14 @@ cdef class PatchSplitter(BestObliqueSplitter):
     # `data_width` are used to determine the vectorized indices corresponding to
     # (x,y) coordinates in the original un-vectorized data.
     cdef public intp_t ndim                       # The number of dimensions of the input data
+    cdef const intp_t[:] data_dims                # The dimensions of the input data
+    cdef const intp_t[:] min_patch_dims           # The minimum size of the patch to sample in each dimension
+    cdef const intp_t[:] max_patch_dims           # The maximum size of the patch to sample in each dimension
+    cdef const uint8_t[:] dim_contiguous          # A boolean array indicating whether each dimension is contiguous
 
-    cdef const intp_t[:] data_dims                      # The dimensions of the input data
-    cdef const intp_t[:] min_patch_dims                 # The minimum size of the patch to sample in each dimension
-    cdef const intp_t[:] max_patch_dims                 # The maximum size of the patch to sample in each dimension
-    cdef const uint8_t[:] dim_contiguous            # A boolean array indicating whether each dimension is contiguous
+    # TODO: assumes all oblique splitters only work with dense data
+    cdef public memoryview X_reshaped
+    cdef memoryview patch_nd_indices
 
     # TODO: check if this works and is necessary for discontiguous data
     # cdef intp_t[:] stride_offsets                # The stride offsets for each dimension
@@ -48,13 +51,13 @@ cdef class PatchSplitter(BestObliqueSplitter):
 
     cdef intp_t[::1] _index_data_buffer
     cdef intp_t[::1] _index_patch_buffer
-    cdef intp_t[:] patch_sampled_size                # A buffer to store the dimensions of the sampled patch
+    cdef intp_t[:] patch_sampled_size             # A buffer to store the dimensions of the sampled patch
     cdef intp_t[:] unraveled_patch_point          # A buffer to store the unraveled patch point
 
     # All oblique splitters (i.e. non-axis aligned splitters) require a
     # function to sample a projection matrix that is applied to the feature matrix
     # to quickly obtain the sampled projections for candidate splits.
-    cdef (intp_t, intp_t) sample_top_left_seed(
+    cdef intp_t sample_top_left_seed(
         self
     ) noexcept nogil
 
@@ -63,23 +66,3 @@ cdef class PatchSplitter(BestObliqueSplitter):
         vector[vector[float32_t]]& proj_mat_weights,
         vector[vector[intp_t]]& proj_mat_indices
     ) noexcept nogil
-
-
-# cdef class UserKernelSplitter(PatchSplitter):
-#     """A class to hold user-specified kernels."""
-#     cdef vector[float32_t[:, ::1]] kernel_dictionary  # A list of C-contiguous 2D kernels
-
-
-cdef class GaussianKernelSplitter(PatchSplitter):
-    """A class to hold Gaussian kernels.
-
-    Overrides the weights that are generated to be sampled from a Gaussian distribution.
-    See: https://www.tutorialspoint.com/gaussian-filter-generation-in-cplusplus
-    See: https://gist.github.com/thomasaarholt/267ec4fff40ca9dff1106490ea3b7567
-    """
-
-    cdef void sample_proj_mat(
-        self,
-        vector[vector[float32_t]]& proj_mat_weights,
-        vector[vector[intp_t]]& proj_mat_indices
-    ) noexcept nogil
diff --git a/treeple/tree/manifold/_morf_splitter.pyx b/treeple/tree/manifold/_morf_splitter.pyx
index f1eaf2918..dc4dffdf5 100644
--- a/treeple/tree/manifold/_morf_splitter.pyx
+++ b/treeple/tree/manifold/_morf_splitter.pyx
@@ -5,14 +5,40 @@
 # cython: wraparound=False
 # cython: initializedcheck=False
 
+cimport numpy as cnp
+
 import numpy as np
 
 from cython.operator cimport dereference as deref
 from libcpp.vector cimport vector
 
 from ..._lib.sklearn.tree._criterion cimport Criterion
+from ..._lib.sklearn.tree._tree cimport ParentInfo
 from ..._lib.sklearn.tree._utils cimport rand_int
-from .._utils cimport ravel_multi_index_cython, unravel_index_cython
+from .._sklearn_splitter cimport sort
+from .._utils cimport (
+    compute_vectorized_indices_from_cartesian,
+    fisher_yates_shuffle,
+    init_2dmemoryview,
+    ravel_multi_index_cython,
+    unravel_index_cython,
+)
+
+
+cdef float64_t INFINITY = np.inf
+
+# Mitigate precision differences between 32 bit and 64 bit
+cdef float32_t FEATURE_THRESHOLD = 1e-7
+
+cdef inline void _init_split(ObliqueSplitRecord* self, intp_t start_pos) noexcept nogil:
+    self.impurity_left = INFINITY
+    self.impurity_right = INFINITY
+    self.pos = start_pos
+    self.feature = 0
+    self.threshold = 0.
+    self.improvement = -INFINITY
+    self.missing_go_to_left = False
+    self.n_missing = 0
 
 
 cdef class PatchSplitter(BestObliqueSplitter):
@@ -33,46 +59,11 @@ cdef class PatchSplitter(BestObliqueSplitter):
         const float64_t[:] sample_weight,
         const uint8_t[::1] missing_values_in_feature_mask,
     ) except -1:
+        # store a view to the (n_samples, n_features) dataset
         BestObliqueSplitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
 
-        return 0
-
-    cdef int node_reset(
-        self,
-        intp_t start,
-        intp_t end,
-        float64_t* weighted_n_node_samples
-    ) except -1 nogil:
-        """Reset splitter on node samples[start:end].
-
-        Returns -1 in case of failure to allocate memory (and raise MemoryError)
-        or 0 otherwise.
-
-        Parameters
-        ----------
-        start : intp_t
-            The index of the first sample to consider
-        end : intp_t
-            The index of the last sample to consider
-        weighted_n_node_samples : ndarray, dtype=float64_t pointer
-            The total weight of those samples
-        """
-
-        self.start = start
-        self.end = end
-
-        self.criterion.init(self.y,
-                            self.sample_weight,
-                            self.weighted_n_samples,
-                            self.samples)
-        self.criterion.set_sample_pointers(start, end)
-
-        weighted_n_node_samples[0] = self.criterion.weighted_n_node_samples
-
-        # Clear all projection vectors
-        for i in range(self.max_features):
-            self.proj_mat_weights[i].clear()
-            self.proj_mat_indices[i].clear()
+        # store a reshaped view of (n_samples, height, width) dataset
+        self.X_reshaped = init_2dmemoryview(X, self.data_dims)
         return 0
 
     cdef void sample_proj_mat(
@@ -87,32 +78,11 @@ cdef class PatchSplitter(BestObliqueSplitter):
         """
         pass
 
-    cdef (intp_t, intp_t) sample_top_left_seed(self) noexcept nogil:
+    cdef intp_t sample_top_left_seed(self) noexcept nogil:
         pass
 
 
-cdef class BaseDensePatchSplitter(PatchSplitter):
-    cdef int init(
-        self,
-        object X,
-        const float64_t[:, ::1] y,
-        const float64_t[:] sample_weight,
-        const uint8_t[::1] missing_values_in_feature_mask,
-        # const int32_t[:] n_categories
-    ) except -1:
-        """Initialize the splitter
-
-        Returns -1 in case of failure to allocate memory (and raise MemoryError)
-        or 0 otherwise.
-        """
-        # Call parent init
-        PatchSplitter.init(self, X, y, sample_weight, missing_values_in_feature_mask)
-
-        self.X = X
-        return 0
-
-
-cdef class BestPatchSplitter(BaseDensePatchSplitter):
+cdef class BestPatchSplitter(PatchSplitter):
     def __cinit__(
         self,
         Criterion criterion,
@@ -159,6 +129,19 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         self.max_patch_dims = max_patch_dims
         self.dim_contiguous = dim_contiguous
 
+        # create a memoryview to store the n-dimensional indices
+        self.patch_nd_indices = <memoryview> np.zeros(self.max_patch_dims[:], dtype=np.intp)
+
+        # store random indices for discontiguous sampling if necessary
+        # these are initialized here to allow fisher-yates shuffling without having
+        # to re-allocate memory
+        self.random_indices = vector[vector[intp_t]](self.ndim)
+        for idx in range(self.ndim):
+            if not self.dim_contiguous[idx]:
+                self.random_indices[idx].reserve(self.max_patch_dims[idx])
+                for jdx in range(self.max_patch_dims[idx]):
+                    self.random_indices[idx].push_back(jdx)
+
         # initialize a buffer to allow for Fisher-Yates
         self._index_patch_buffer = np.zeros(np.max(self.max_patch_dims), dtype=np.intp)
         self._index_data_buffer = np.zeros(np.max(self.data_dims), dtype=np.intp)
@@ -192,31 +175,178 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
                 self.feature_weight.base if self.feature_weight is not None else None,
             ), self.__getstate__())
 
-    cdef (intp_t, intp_t) sample_top_left_seed(self) noexcept nogil:
-        """Sample the top-left seed for the n-dim patch.
+    cdef int node_split(
+        self,
+        ParentInfo* parent_record,
+        SplitRecord* split,
+    ) except -1 nogil:
+        """Find the best_split split on node samples[start:end]
+
+        Returns -1 in case of failure to allocate memory (and raise MemoryError)
+        or 0 otherwise.
+        """
+        # typecast the pointer to an ObliqueSplitRecord
+        cdef ObliqueSplitRecord* oblique_split = <ObliqueSplitRecord*>(split)
+
+        # Draw random splits and pick the best_split
+        cdef intp_t[::1] samples = self.samples
+        cdef intp_t start = self.start
+        cdef intp_t end = self.end
+
+        # pointer array to store feature values to split on
+        cdef float32_t[::1] feature_values = self.feature_values
+        cdef intp_t max_features = self.max_features
+        cdef intp_t min_samples_leaf = self.min_samples_leaf
+
+        # keep track of split record for current_split node and the best_split split
+        # found among the sampled projection vectors
+        cdef ObliqueSplitRecord best_split, current_split
+        cdef float64_t current_proxy_improvement = -INFINITY
+        cdef float64_t best_proxy_improvement = -INFINITY
+
+        cdef float64_t impurity = parent_record.impurity
+
+        cdef intp_t feat_i, p       # index over computed features and start/end
+        cdef intp_t partition_end
+        cdef float32_t temp_d         # to compute a projection feature value
+
+        # instantiate the split records
+        _init_split(&best_split, end)
+
+        # For every vector in the projection matrix
+        for feat_i in range(max_features):
+            # Projection vector has no nonzeros
+            if self.proj_mat_weights[feat_i].empty():
+                continue
+
+            # XXX: 'feature' is not actually used in oblique split records
+            # Just indicates which split was sampled
+            current_split.feature = feat_i
+            
+            # sample the projection vector
+            self.sample_proj_vec(self.proj_mat_weights[feat_i], self.proj_mat_indices[feat_i])
+            current_split.proj_vec_weights = &self.proj_mat_weights[feat_i]
+            current_split.proj_vec_indices = &self.proj_mat_indices[feat_i]
+
+            # Compute linear combination of features and then
+            # sort samples according to the feature values.
+            self.compute_features_over_samples(
+                start,
+                end,
+                samples,
+                feature_values,
+                &self.proj_mat_weights[feat_i],
+                &self.proj_mat_indices[feat_i]
+            )
+
+            # Sort the samples
+            sort(&feature_values[start], &samples[start], end - start)
+
+            # Evaluate all splits
+            self.criterion.reset()
+            p = start
+            while p < end:
+                while (p + 1 < end and feature_values[p + 1] <= feature_values[p] + FEATURE_THRESHOLD):
+                    p += 1
+
+                p += 1
+
+                if p < end:
+                    current_split.pos = p
+
+                    # Reject if min_samples_leaf is not guaranteed
+                    if (((current_split.pos - start) < min_samples_leaf) or
+                            ((end - current_split.pos) < min_samples_leaf)):
+                        continue
+
+                    self.criterion.update(current_split.pos)
+                    # Reject if min_weight_leaf is not satisfied
+                    if self.check_postsplit_conditions() == 1:
+                        continue
+
+                    current_proxy_improvement = \
+                        self.criterion.proxy_impurity_improvement()
+
+                    if current_proxy_improvement > best_proxy_improvement:
+                        best_proxy_improvement = current_proxy_improvement
+                        # sum of halves is used to avoid infinite value
+                        current_split.threshold = feature_values[p - 1] / 2.0 + feature_values[p] / 2.0
+
+                        if (
+                            (current_split.threshold == feature_values[p]) or
+                            (current_split.threshold == INFINITY) or
+                            (current_split.threshold == -INFINITY)
+                        ):
+                            current_split.threshold = feature_values[p - 1]
+
+                        best_split = current_split  # copy
+
+        # Reorganize into samples[start:best_split.pos] + samples[best_split.pos:end]
+        if best_split.pos < end:
+            partition_end = end
+            p = start
+
+            while p < partition_end:
+                # Account for projection vector
+                temp_d = 0.0
+                for j in range(best_split.proj_vec_indices.size()):
+                    temp_d += self.X[samples[p], deref(best_split.proj_vec_indices)[j]] *\
+                                deref(best_split.proj_vec_weights)[j]
+
+                if temp_d <= best_split.threshold:
+                    p += 1
+
+                else:
+                    partition_end -= 1
+                    samples[p], samples[partition_end] = \
+                        samples[partition_end], samples[p]
+
+            self.criterion.reset()
+            self.criterion.update(best_split.pos)
+            self.criterion.children_impurity(&best_split.impurity_left,
+                                             &best_split.impurity_right)
+            best_split.improvement = self.criterion.impurity_improvement(
+                impurity, best_split.impurity_left, best_split.impurity_right)
+
+        # Return values
+        deref(oblique_split).proj_vec_indices = best_split.proj_vec_indices
+        deref(oblique_split).proj_vec_weights = best_split.proj_vec_weights
+        deref(oblique_split).feature = best_split.feature
+        deref(oblique_split).pos = best_split.pos
+        deref(oblique_split).threshold = best_split.threshold
+        deref(oblique_split).improvement = best_split.improvement
+        deref(oblique_split).impurity_left = best_split.impurity_left
+        deref(oblique_split).impurity_right = best_split.impurity_right
+
+        # XXX: Fix when we can track constants
+        parent_record.n_constant_features = 0
+        return 0
+    
+    cdef inline intp_t sample_top_left_seed(
+        self
+    ) noexcept nogil:
+        """Sample the top-left seed, and patch size for the n-dim patch.
 
         Returns
         -------
         top_left_seed : intp_t
             The top-left seed vectorized (i.e. raveled) for the n-dim patch.
-        patch_size : intp_t
-            The total size of the n-dim patch (i.e. the volume).
         """
         # now get the top-left seed that is used to then determine the top-left
         # position in patch
         # compute top-left seed for the multi-dimensional patch
         cdef intp_t top_left_patch_seed
-        cdef intp_t patch_size = 1
 
         cdef uint32_t* random_state = &self.rand_r_state
 
         # define parameters for the random patch
         cdef intp_t patch_dim
         cdef intp_t delta_patch_dim
-
         cdef intp_t dim
-
         cdef intp_t idx
+        
+        # create a vector to store the unraveled patch top left seed point
+        cdef vector[intp_t] unraveled_patch_point = vector[intp_t](self.ndim)
 
         for idx in range(self.ndim):
             # compute random patch width and height
@@ -238,7 +368,6 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
 
                 # write to buffer
                 self.patch_sampled_size[idx] = patch_dim
-                patch_size *= patch_dim
             elif self.boundary == "wrap":
                 # add circular boundary conditions
                 delta_patch_dim = self.data_dims[idx] + 2 * (patch_dim - 1)
@@ -252,19 +381,93 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
                 # resample the patch dimension due to padding
                 patch_dim = min(patch_dim, min(dim+1, self.data_dims[idx] + patch_dim - dim - 1))
                 self.patch_sampled_size[idx] = patch_dim
-                patch_size *= patch_dim
 
                 # TODO: make this work
                 # Convert the top-left-seed value to it's appropriate index in the full image.
                 top_left_patch_seed = max(0, dim - patch_dim + 1)
 
+            # unraveled_patch_point[idx] = top_left_patch_seed
             self.unraveled_patch_point[idx] = top_left_patch_seed
-
+        
+        # top_left_patch_seed = ravel_multi_index_cython(
+        #     unraveled_patch_point,
+        #     self.data_dims
+        # )
         top_left_patch_seed = ravel_multi_index_cython(
             self.unraveled_patch_point,
             self.data_dims
         )
-        return top_left_patch_seed, patch_size
+        return top_left_patch_seed
+
+
+    cdef inline intp_t sample_top_left_seed_v2(
+        self
+    ) noexcept nogil:
+        """Sample the top-left seed, and patch size for the n-dim patch.
+
+        Returns
+        -------
+        top_left_seed : intp_t
+            The top-left seed vectorized (i.e. raveled) for the n-dim patch.
+        """
+        # the top-left position in the patch for each dimension
+        cdef intp_t top_left_patch_seed
+
+        cdef uint32_t* random_state = &self.rand_r_state
+
+        # define parameters for the random patch
+        cdef intp_t patch_dim
+        cdef intp_t delta_patch_dim
+        cdef intp_t dim
+        
+        # pre-allocated buffer to store the unraveled patch top left seed point
+        cdef intp_t[:] unraveled_patch_point = self.unraveled_patch_point
+
+        for dim in range(self.ndim):
+            # compute random patch width and height
+            # Note: By constraining max patch height/width to be at least the min
+            # patch height/width this ensures that the minimum value of
+            # patch_height and patch_width is 1
+            patch_dim = rand_int(
+                self.min_patch_dims[dim],
+                self.max_patch_dims[dim] + 1,
+                random_state
+            )
+
+            if not self.dim_contiguous[dim]:
+                # fisher-yates shuffle the discontiguous dimension, so we can randomly sample
+                # without replacement, the indices of the patch in that dimension
+                fisher_yates_shuffle(self.random_indices[dim], self.random_indices[dim].size(), random_state)
+
+            # sample the top-left index and patch size for this dimension based on boundary effects
+            if self.boundary is None:
+                # compute the difference between the image dimensions and the current
+                # random patch dimensions for sampling
+                delta_patch_dim = (self.data_dims[dim] - patch_dim) + 1
+                top_left_patch_seed = rand_int(0, delta_patch_dim, random_state)
+
+                # write to buffer
+                self.patch_sampled_size[dim] = patch_dim
+            elif self.boundary == "wrap":
+                pass
+
+
+            # now add the relevant indices for each element of the patch
+            for jdx in range(patch_dim):
+                if self.dim_contiguous[dim]:
+                    self.patch_nd_indices[dim][jdx] = top_left_patch_seed + jdx
+                else:
+                    # if discontiguous, we will perform random sampling
+                    self.patch_nd_indices[dim][jdx] = self.random_indices[dim][jdx]
+
+            unraveled_patch_point[dim] = top_left_patch_seed
+        
+        # get the vectorized index of the top-left seed
+        top_left_patch_seed = ravel_multi_index_cython(
+            unraveled_patch_point,
+            self.data_dims
+        )
+        return top_left_patch_seed
 
     cdef void sample_proj_mat(
         self,
@@ -282,22 +485,17 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         # and top-left seed
         cdef intp_t top_left_patch_seed
 
-        # size of the sampled patch, which is just the size of the n-dim patch
-        # (\prod_i self.patch_sampled_size[i])
-        cdef intp_t patch_size
-
         for proj_i in range(0, max_features):
             # now get the top-left seed that is used to then determine the top-left
             # position in patch
             # compute top-left seed for the multi-dimensional patch
-            top_left_patch_seed, patch_size = self.sample_top_left_seed()
+            top_left_patch_seed = self.sample_top_left_seed()
 
             # sample a projection vector given the top-left seed point in n-dimensional space
             self.sample_proj_vec(
                 proj_mat_weights,
                 proj_mat_indices,
                 proj_i,
-                patch_size,
                 top_left_patch_seed,
                 self.patch_sampled_size
             )
@@ -307,7 +505,6 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         vector[vector[float32_t]]& proj_mat_weights,
         vector[vector[intp_t]]& proj_mat_indices,
         intp_t proj_i,
-        intp_t patch_size,
         intp_t top_left_patch_seed,
         const intp_t[:] patch_dims,
     ) noexcept nogil:
@@ -315,6 +512,14 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         # iterates over the size of the patch
         cdef intp_t patch_idx
 
+        cdef intp_t i
+
+        # size of the sampled patch, which is just the size of the n-dim patch
+        # (\prod_i self.patch_sampled_size[i])
+        cdef intp_t patch_size = 1
+        for i in range(0, self.ndim):
+            patch_size *= patch_dims[i]
+
         # stores how many patches we have iterated so far
         cdef intp_t vectorized_patch_offset
         cdef intp_t vectorized_point_offset
@@ -330,7 +535,6 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         cdef intp_t other_dims_offset
         cdef intp_t row_index
 
-        cdef intp_t i
         cdef intp_t num_rows = self.data_dims[0]
         if self._discontiguous:
             # fill with values 0, 1, ..., dimension - 1
@@ -401,6 +605,166 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
             proj_mat_indices[proj_i].push_back(vectorized_point)
             proj_mat_weights[proj_i].push_back(weight)
 
+    cdef void sample_proj_vec_v2(
+        self,
+        vector[vector[float32_t]]& proj_mat_weights,
+        vector[vector[intp_t]]& proj_mat_indices,
+        vector[intp_t]& top_left_seed,
+        intp_t proj_i,
+        const intp_t[:] patch_dims,
+    ) noexcept nogil:
+        cdef uint32_t* random_state = &self.rand_r_state
+        
+        # iterates over the size of the patch
+        cdef intp_t patch_idx
+
+        # stores how many patches we have iterated so far
+        cdef intp_t vectorized_patch_offset
+        cdef intp_t vectorized_point_offset
+        cdef intp_t vectorized_point
+
+        cdef intp_t dim_idx
+
+        # weights are default to 1
+        cdef float32_t weight = 1.
+
+        # XXX: still unsure if it works yet
+        # XXX: THIS ONLY WORKS FOR THE FIRST DIMENSION THAT IS DISCONTIGUOUS.
+        cdef intp_t other_dims_offset
+        cdef intp_t row_index
+
+        cdef intp_t i
+        cdef intp_t num_rows = self.data_dims[0]
+        
+        # iterate over the patch and store the vectorized index
+        cdef vector[intp_t] index_arr = vector[intp_t](self.ndim)
+        for dim in range(top_left_seed.size()):
+            index_arr[dim] = top_left_seed[dim]
+
+        # fisher_yates_shuffle(dim_index_arr, patch_dims[dim], random_state)
+
+        # iterate over the indices of the patch, and get the resulting
+        # raveled index at each patch-point, resulting in now a 1D vector
+        # of indices representing each patch-point.
+        while True:
+            # Note: we iterate over the last axis first assuming that data
+            # is stored in a C-contiguous array.
+            for dim in range(self.ndim - 1, -1, -1):
+                if self.dim_contiguous[dim]:
+                    index_arr[dim] += 1
+
+                    # check we have not reached the boundaries of the patch. If we have not,
+                    # we can break out of the loop and continue iterating in this dimension
+                    if index_arr[dim] < top_left_seed[dim] + patch_dims[dim]:
+                        break
+
+                    # if we have reached the boundary, we reset the index and continue
+                    index_arr[dim] = top_left_seed[dim]
+                else:
+                    random_index = random_indices[dim][r_idx]
+                    # dimension is assumed discontiguous, and thus is randomly chosen
+                    index_arr[dim] = rand_int(0, self.data_dims[dim], random_state)
+
+                    discontiguous_index += 1
+
+                    if r_idx < patch_dims[dim]:
+                        idx[dim] = random_indices[dim][r_idx]
+                        r_idx += 1
+                        break
+
+                    r_idx = 0
+                    # if we have reached the boundary, we reset the index and continue
+                    index_arr[dim] = top_left_seed[dim]
+
+                # if self.dim_contiguous[dim] and 
+                    # break
+                # elif not self.dim_contiguous[dim] and index > patch_dims[dim]:
+                #     break
+            else:
+                break
+            
+        #     # get the vectorized version of this index
+        #     # dim_idx = ravel_multi_index_cython(
+        #     #     index_arr,
+        #     #     self.data_dims
+        #     # )
+
+        #     proj_mat_indices[proj_i].push_back(vectorized_point)
+        #     proj_mat_weights[proj_i].push_back(weight)
+
+            # idx[dim] = start[dim]
+                # TODO: iterate correctly over multi-dim array
+                # for idx in range(len(self.data_dims[dim])):
+                #         unvectorized_idx[dim] = top_left_seed[dim] + idx
+                #     else:
+                #         unvectorized_idx[dim] = top_left_seed[dim] + rand_int(0, patch_dims[dim], random_state)
+
+                #     # get the vectorized version of this index
+                #     dim_idx = ravel_multi_index_cython(
+                #         unvectorized_idx,
+                #         self.data_dims
+                #     )
+
+                    # proj_mat_indices[proj_i].push_back(vectorized_point)
+                    # proj_mat_weights[proj_i].push_back(weight)
+
+
+        # for patch_idx in range(patch_size):
+        #     # keep track of which dimensions of the patch we have iterated over
+        #     vectorized_patch_offset = 1
+
+        #     # Once the vectorized top-left-seed is unraveled, you can add the patch
+        #     # points in the array structure and compute their vectorized (unraveled)
+        #     # points, which are added to the projection vector
+        #     unravel_index_cython(top_left_patch_seed, self.data_dims, self.unraveled_patch_point)
+
+        #     for dim_idx in range(self.ndim):
+        #         # compute the offset from the top-left patch seed based on:
+        #         # 1. the current patch index
+        #         # 2. the patch dimension indexed by `dim_idx`
+        #         # 3. and the vectorized patch dimensions that we have seen so far
+        #         # the `vectorized_point_offset` is the offset from the top-left vectorized seed for this dimension
+        #         vectorized_point_offset = (patch_idx // (vectorized_patch_offset)) % patch_dims[dim_idx]
+
+        #         # then we compute the actual point in the original data shape
+        #         self.unraveled_patch_point[dim_idx] = self.unraveled_patch_point[dim_idx] + vectorized_point_offset
+        #         vectorized_patch_offset *= patch_dims[dim_idx]
+
+        #     # if any dimensions are discontiguous, we want to migrate the entire axis a fixed amount
+        #     # based on the shuffling
+        #     if self._discontiguous is True:
+        #         for dim_idx in range(self.ndim):
+        #             if self.dim_contiguous[dim_idx] is True:
+        #                 continue
+
+        #             # determine the "row" we are currently on
+        #             # other_dims_offset = 1
+        #             # for idx in range(dim_idx + 1, self.ndim):
+        #             #     other_dims_offset *= self.data_dims[idx]
+        #             # row_index = self.unraveled_patch_point[dim_idx] % other_dims_offset
+        #             # determine the "row" we are currently on
+        #             other_dims_offset = 1
+        #             for idx in range(dim_idx + 1, self.ndim):
+        #                 if not self.dim_contiguous[idx]:
+        #                     other_dims_offset *= self.data_dims[idx]
+
+        #             row_index = 0
+        #             for idx in range(dim_idx + 1, self.ndim):
+        #                 if not self.dim_contiguous[idx]:
+        #                     row_index += (
+        #                         (self.unraveled_patch_point[idx] // other_dims_offset) %
+        #                         self.patch_sampled_size[idx]
+        #                     ) * other_dims_offset
+        #                     other_dims_offset //= self.data_dims[idx]
+
+        #             # assign random row index now
+        #             self.unraveled_patch_point[dim_idx] = self._index_patch_buffer[row_index]
+
+        #     # ravel the patch point into the original data dimensions
+        #     vectorized_point = ravel_multi_index_cython(self.unraveled_patch_point, self.data_dims)
+        #     proj_mat_indices[proj_i].push_back(vectorized_point)
+        #     proj_mat_weights[proj_i].push_back(weight)
+
     cdef void compute_features_over_samples(
         self,
         intp_t start,
@@ -408,18 +772,62 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         const intp_t[:] samples,
         float32_t[:] feature_values,
         vector[float32_t]* proj_vec_weights,  # weights of the vector (max_features,)
-        vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
+        vector[intp_t]* proj_vec_indices      # indices of the features (max_features,)
     ) noexcept nogil:
         """Compute the feature values for the samples[start:end] range.
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
         or 0 otherwise.
+
+        Parameters
+        ----------
+        start : int
+            The start index of the samples.
+        end : int
+            The end index of the samples.
+        samples : array-like, shape (n_samples,)
+            The indices of the samples.
+        feature_values : array-like, shape (n_samples,)
+            The pre-allocated buffer to store the feature values.
+        proj_vec_weights : array-like, shape (max_features,)
+            The weights of the projection vector.
+        proj_vec_indices : array-like, shape (max_features,)
+            The indices of the projection vector. This will only store the vectorized
+            index of the top-left-seed.
         """
         cdef intp_t idx, jdx
 
         # initialize feature weight to normalize across patch
         cdef float32_t patch_weight
 
+        if proj_vec_indices.size() != 1:
+            with gil:
+                raise ValueError("proj_vec_indices should only have one element corresponding to the top left seed.")
+
+        # patch dimensions
+        cdef intp_t[:] patch_dims = self.patch_sampled_size
+        cdef vector[intp_t] top_left_seed = vector[intp_t](self.ndim)
+
+        cdef intp_t volume_of_patch = 1
+        # Calculate the total number of Cartesian products
+        for i in range(self.ndim):
+            volume_of_patch *= len(self.patch_nd_indices[i])
+
+        # create a buffer to store the raveled indices of the patch, which will be used
+        # to compute the relevant feature values
+        cdef vector[intp_t] raveled_patch_indices = vector[intp_t](volume_of_patch)
+        for jdx in range(0, proj_vec_indices.size()):
+            # get the index of the top-left patch
+            top_left_patch_index = deref(proj_vec_indices)[jdx]
+
+            # compute the raveled index of all the points in the patch
+            compute_vectorized_indices_from_cartesian(
+                top_left_patch_index,
+                self.patch_nd_indices,
+                self.data_dims,
+                raveled_patch_indices
+            )
+
         # Compute linear combination of features and then
         # sort samples according to the feature values.
         for idx in range(start, end):
@@ -427,15 +835,17 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
 
             # initialize the feature value to 0
             feature_values[idx] = 0
-            for jdx in range(0, proj_vec_indices.size()):
+            for kdx in range(raveled_patch_indices.size()):
+                feature_index = raveled_patch_indices[kdx]
+
                 feature_values[idx] += self.X[
-                    samples[idx], deref(proj_vec_indices)[jdx]
+                    samples[idx], feature_index
                 ] * deref(proj_vec_weights)[jdx]
 
-                if self.feature_weight is not None:
-                    # gets the feature weight for this specific column from X
-                    # the default of feature_weights[i] is (1/n_features) for all i
-                    patch_weight += self.feature_weight[samples[idx], deref(proj_vec_indices)[jdx]]
+            if self.feature_weight is not None:
+                # gets the feature weight for this specific column from X
+                # the default of feature_weights[i] is (1/n_features) for all i
+                patch_weight += self.feature_weight[samples[idx], deref(proj_vec_indices)[jdx]]
 
             if self.feature_weight is not None:
                 feature_values[idx] /= patch_weight
@@ -464,7 +874,6 @@ cdef class BestPatchSplitterTester(BestPatchSplitter):
             proj_mat_weights,
             proj_mat_indices,
             proj_i,
-            patch_size,
             top_left_patch_seed,
             patch_dims
         )
diff --git a/treeple/tree/manifold/meson.build b/treeple/tree/manifold/meson.build
index 867da8eaa..6b900aec6 100644
--- a/treeple/tree/manifold/meson.build
+++ b/treeple/tree/manifold/meson.build
@@ -2,6 +2,9 @@ tree_extension_metadata = {
   '_morf_splitter':
     {'sources': ['_morf_splitter.pyx'],
      'override_options': ['cython_language=cpp', 'optimization=3']},
+  '_kernel_splitter':
+    {'sources': ['_kernel_splitter.pyx'],
+     'override_options': ['cython_language=cpp', 'optimization=3']},
 }
 
 foreach ext_name, ext_dict : tree_extension_metadata
diff --git a/treeple/tree/meson.build b/treeple/tree/meson.build
index 2febbbc6a..c77266a4c 100644
--- a/treeple/tree/meson.build
+++ b/treeple/tree/meson.build
@@ -36,6 +36,7 @@ python_sources = [
   '_neighbors.py',
   '_honest_tree.py',
   '_marginalize.py',
+  '_kernel.py',
 ]
 
 py.install_sources(
diff --git a/treeple/tree/tests/meson.build b/treeple/tree/tests/meson.build
index 552eb7356..cbdd38f71 100644
--- a/treeple/tree/tests/meson.build
+++ b/treeple/tree/tests/meson.build
@@ -7,6 +7,8 @@ python_sources = [
   'test_all_trees.py',
   'test_unsupervised_tree.py',
   'test_multiview.py',
+  'test_kernel.py',
+  'test_manifold.py',
 ]
 
 py.install_sources(
diff --git a/treeple/tree/tests/test_kernel.py b/treeple/tree/tests/test_kernel.py
new file mode 100644
index 000000000..6e18b276c
--- /dev/null
+++ b/treeple/tree/tests/test_kernel.py
@@ -0,0 +1,52 @@
+import numpy as np
+import pytest
+
+from treeple.tree._kernel import sample_gaussian_kernels
+
+
+def gaussian_2d(x, y, mu, sigma):
+    """Generate the 2D Gaussian value at point (x, y).
+
+    Used to generate the expected Gaussian kernel for testing.
+    """
+    return np.exp(-((x - mu) ** 2 + (y - mu) ** 2) / (2 * sigma**2))
+
+
+@pytest.mark.parametrize(
+    "n_kernels, min_size, max_size, mu, sigma",
+    [
+        (1, 3, 3, 0.0, 1.0),  # Single fixed kernel
+        (2, 3, 5, 0.5, 1.1),  # Two fixed-size kernels with varying sigma
+        (3, 3, 5, (0.0, 0.5), (0.1, 1.0)),  # Varying sizes, fixed sigma
+    ],
+)
+def test_sample_gaussian_kernels(n_kernels, min_size, max_size, mu, sigma):
+    kernel_matrix, kernel_params = sample_gaussian_kernels(n_kernels, min_size, max_size, mu, sigma)
+
+    # Ensure kernel_matrix and kernel_sizes have correct lengths
+    assert kernel_matrix.shape[0] == n_kernels
+    assert kernel_params["size"].shape[0] == n_kernels
+
+    for i in range(n_kernels):
+        size = kernel_params["size"][i]
+        mu_sample = kernel_params["mu"][i]
+        sigma_sample = kernel_params["sigma"][i]
+
+        # Extract and reshape the kernel
+        start_idx = kernel_matrix.indptr[i]
+        end_idx = kernel_matrix.indptr[i + 1]
+        kernel_vector = kernel_matrix.data[start_idx:end_idx]
+        kernel = kernel_vector.reshape(size, size)
+
+        # Generate the expected Gaussian kernel
+        x = np.linspace(-1, 1, size)
+        y = np.linspace(-1, 1, size)
+        X, Y = np.meshgrid(x, y)
+        expected_kernel = gaussian_2d(X, Y, mu_sample, sigma_sample)
+
+        # Normalize the expected kernel for comparison
+        expected_kernel /= expected_kernel.sum()
+        kernel /= kernel.sum()
+
+        # Check that the kernel matches the expected Gaussian distribution
+        np.testing.assert_allclose(kernel, expected_kernel, rtol=1e-2, atol=1e-2)
diff --git a/treeple/tree/tests/test_manifold.py b/treeple/tree/tests/test_manifold.py
new file mode 100644
index 000000000..bdd53b821
--- /dev/null
+++ b/treeple/tree/tests/test_manifold.py
@@ -0,0 +1,83 @@
+import numpy as np
+from numpy.testing import assert_array_equal
+from sklearn import datasets
+
+from treeple.tree import (
+    ExtraObliqueDecisionTreeClassifier,
+    ExtraObliqueDecisionTreeRegressor,
+    ObliqueDecisionTreeClassifier,
+    ObliqueDecisionTreeRegressor,
+    PatchObliqueDecisionTreeClassifier,
+    PatchObliqueDecisionTreeRegressor,
+    UnsupervisedDecisionTree,
+    UnsupervisedObliqueDecisionTree,
+)
+
+ALL_TREES = [
+    ExtraObliqueDecisionTreeRegressor,
+    ObliqueDecisionTreeRegressor,
+    PatchObliqueDecisionTreeRegressor,
+    ExtraObliqueDecisionTreeClassifier,
+    ObliqueDecisionTreeClassifier,
+    PatchObliqueDecisionTreeClassifier,
+    UnsupervisedDecisionTree,
+    UnsupervisedObliqueDecisionTree,
+]
+
+rng = np.random.RandomState(1)
+
+# load digits dataset and randomly permute it
+digits = datasets.load_digits()
+perm = rng.permutation(digits.target.size)
+digits.data = digits.data[perm]
+digits.target = digits.target[perm]
+
+
+def test_splitters():
+    """Test that splitters are picklable."""
+    X, y = digits.data, digits.target
+    X = X.astype(np.float32)
+    sample_weight = np.ones(len(y), dtype=np.float64).squeeze()
+    y = y.reshape(-1, 1).astype(np.float64)
+    # missing_values_in_feature_mask = None
+
+    print(X.shape, y.shape)
+    from treeple._lib.sklearn.tree._criterion import Gini
+    from treeple.tree.manifold._morf_splitter import BestPatchSplitterTester
+
+    criterion = Gini(1, np.array((0, 1), dtype=np.intp))
+    max_features = 6
+    min_samples_leaf = 1
+    min_weight_leaf = 0.0
+    monotonic_cst = np.array([1, 1, 1, 1, 1, 1], dtype=np.int8)
+    random_state = np.random.RandomState(100)
+    boundary = None
+    feature_weight = None
+    min_patch_dims = np.array((1, 1), dtype=np.intp)
+    max_patch_dims = np.array((3, 1), dtype=np.intp)
+    dim_contiguous = np.array((True, True))
+    data_dims = np.array((8, 8), dtype=np.intp)
+
+    feature_combinations = 1.5
+
+    splitter = BestPatchSplitterTester(
+        criterion,
+        max_features,
+        min_samples_leaf,
+        min_weight_leaf,
+        random_state,
+        monotonic_cst,
+        feature_combinations,
+        min_patch_dims,
+        max_patch_dims,
+        dim_contiguous,
+        data_dims,
+        boundary,
+        feature_weight,
+    )
+    splitter.init_test(
+        X,
+        y,
+        sample_weight,
+    )
+    assert_array_equal(splitter.X_reshaped.reshape(-1, 64), X)