Also convert double to float64_t

Signed-off-by: Adam Li <adam2392@gmail.com>
neurodata · Oct 12, 2023 · 06cac3b · 06cac3b
1 parent 82927e6
commit 06cac3b
Show file tree

Hide file tree

Showing 19 changed files with 166 additions and 166 deletions.
diff --git a/build_requirements.txt b/build_requirements.txt
@@ -3,7 +3,7 @@ meson-python
 cython==0.29.36
 ninja
 numpy
-scikit-learn>=1.3
+scikit-learn>=1.3.1
 click
 rich-click
 doit

diff --git a/pyproject.toml b/pyproject.toml
@@ -38,7 +38,7 @@ include = [
 dependencies = [
     'numpy',
     'scipy>=1.5.0',
-    'scikit-learn>=1.3'
+    'scikit-learn>=1.3.1'
 ]
 
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 numpy>=1.25
 scipy
-scikit-learn>=1.3
+scikit-learn>=1.3.1
diff --git a/sktree/tree/_marginal.pyx b/sktree/tree/_marginal.pyx
@@ -140,7 +140,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
     cdef float32_t X_i_node_feature
 
     cdef float32_t n_node_samples, n_right_samples, n_left_samples
-    cdef double p_left
+    cdef float64_t p_left
     cdef intp_t is_left
 
     # Initialize output
@@ -172,7 +172,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
                             n_right_samples = tree.nodes[node.right_child].n_node_samples
 
                         # compute the probabilies for going left and right
-                        p_left = (<double>n_left_samples / n_node_samples)
+                        p_left = (<float64_t>n_left_samples / n_node_samples)
 
                         # randomly sample a direction
                         is_left = rand_weighted_binary(p_left, rand_r_state)

diff --git a/sktree/tree/_oblique_splitter.pxd b/sktree/tree/_oblique_splitter.pxd
@@ -25,10 +25,10 @@ cdef struct ObliqueSplitRecord:
     intp_t pos                  # Split samples array at the given position,
     #                           # i.e. count of samples below threshold for feature.
     #                           # pos is >= end if the node is a leaf.
-    double threshold            # Threshold to split at.
-    double improvement          # Impurity improvement given parent node.
-    double impurity_left        # Impurity of the left split.
-    double impurity_right       # Impurity of the right split.
+    float64_t threshold            # Threshold to split at.
+    float64_t improvement          # Impurity improvement given parent node.
+    float64_t impurity_left        # Impurity of the left split.
+    float64_t impurity_right       # Impurity of the right split.
 
     vector[float32_t]* proj_vec_weights   # weights of the vector (max_features,)
     vector[intp_t]* proj_vec_indices    # indices of the features (max_features,)
@@ -62,7 +62,7 @@ cdef class BaseObliqueSplitter(Splitter):
         self,
         intp_t start,
         intp_t end,
-        double* weighted_n_node_samples
+        float64_t* weighted_n_node_samples
     ) except -1 nogil
 
     cdef void compute_features_over_samples(
@@ -77,11 +77,11 @@ cdef class BaseObliqueSplitter(Splitter):
 
     cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
         intp_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil
 
     cdef inline void fisher_yates_shuffle_memview(
@@ -96,7 +96,7 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
     # to split the samples samples[start:end].
 
     # Oblique Splitting extra parameters
-    cdef public double feature_combinations             # Number of features to combine
+    cdef public float64_t feature_combinations             # Number of features to combine
     cdef intp_t n_non_zeros                             # Number of non-zero features
     cdef intp_t[::1] indices_to_sample                  # an array of indices to sample of size mtry X n_features
 
@@ -113,11 +113,11 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
 cdef class BestObliqueSplitter(ObliqueSplitter):
     cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
         intp_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil
 
 
@@ -131,16 +131,16 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
 
     cdef intp_t partition_samples(
         self,
-        double current_threshold
+        float64_t current_threshold
     ) noexcept nogil
 
     cdef intp_t node_split(
         self,
-        double impurity,   # Impurity of the node
+        float64_t impurity,   # Impurity of the node
         SplitRecord* split,
         intp_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil
 
 

diff --git a/sktree/tree/_oblique_splitter.pyx b/sktree/tree/_oblique_splitter.pyx
@@ -13,7 +13,7 @@ from sklearn.tree._utils cimport rand_int, rand_uniform
 from .._lib.sklearn.tree._criterion cimport Criterion
 
 
-cdef double INFINITY = np.inf
+cdef float64_t INFINITY = np.inf
 
 # Mitigate precision differences between 32 bit and 64 bit
 cdef float32_t FEATURE_THRESHOLD = 1e-7
@@ -45,7 +45,7 @@ cdef class BaseObliqueSplitter(Splitter):
         pass
 
     cdef intp_t node_reset(self, intp_t start, intp_t end,
-                           double* weighted_n_node_samples) except -1 nogil:
+                           float64_t* weighted_n_node_samples) except -1 nogil:
         """Reset splitter on node samples[start:end].
 
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
@@ -57,7 +57,7 @@ cdef class BaseObliqueSplitter(Splitter):
             The index of the first sample to consider
         end : intp_t
             The index of the last sample to consider
-        weighted_n_node_samples : ndarray, dtype=double pointer
+        weighted_n_node_samples : ndarray, dtype=float64_t pointer
             The total weight of those samples
         """
 
@@ -144,10 +144,10 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
         Criterion criterion,
         intp_t max_features,
         intp_t min_samples_leaf,
-        double min_weight_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
+        float64_t feature_combinations,
         *argv
     ):
         """
@@ -165,11 +165,11 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
             which would result in having less samples in a leaf are not
             considered.
 
-        min_weight_leaf : double
+        min_weight_leaf : float64_t
             The minimal weight each leaf can have, where the weight is the sum
             of the weights of each sample in it.
 
-        feature_combinations : double
+        feature_combinations : float64_t
             The average number of features to combine in an oblique split.
             Each feature is independently included with probability
             ``feature_combination`` / ``n_features``.
@@ -290,11 +290,11 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
 
     cdef intp_t node_split(
         self,
-        double impurity,
+        float64_t impurity,
         SplitRecord* split,
         intp_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil:
         """Find the best_split split on node samples[start:end]
 
@@ -317,8 +317,8 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
         # keep track of split record for current_split node and the best_split split
         # found among the sampled projection vectors
         cdef ObliqueSplitRecord best_split, current_split
-        cdef double current_proxy_improvement = -INFINITY
-        cdef double best_proxy_improvement = -INFINITY
+        cdef float64_t current_proxy_improvement = -INFINITY
+        cdef float64_t best_proxy_improvement = -INFINITY
 
         cdef intp_t feat_i, p       # index over computed features and start/end
         cdef intp_t partition_end
@@ -472,7 +472,7 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
         min_feature_value_out[0] = min_feature_value
         max_feature_value_out[0] = max_feature_value
 
-    cdef inline intp_t partition_samples(self, double current_threshold) noexcept nogil:
+    cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
         """Partition samples for feature_values at the current_threshold."""
         cdef:
             intp_t p = self.start
@@ -496,11 +496,11 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
     # overwrite the node_split method with random threshold selection
     cdef intp_t node_split(
         self,
-        double impurity,
+        float64_t impurity,
         SplitRecord* split,
         intp_t* n_constant_features,
-        double lower_bound,
-        double upper_bound,
+        float64_t lower_bound,
+        float64_t upper_bound,
     ) except -1 nogil:
         """Find the best_split split on node samples[start:end]
 
@@ -520,13 +520,13 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
         cdef float32_t[::1] feature_values = self.feature_values
         cdef intp_t max_features = self.max_features
         cdef intp_t min_samples_leaf = self.min_samples_leaf
-        cdef double min_weight_leaf = self.min_weight_leaf
+        cdef float64_t min_weight_leaf = self.min_weight_leaf
 
         # keep track of split record for current_split node and the best_split split
         # found among the sampled projection vectors
         cdef ObliqueSplitRecord best_split, current_split
-        cdef double current_proxy_improvement = -INFINITY
-        cdef double best_proxy_improvement = -INFINITY
+        cdef float64_t current_proxy_improvement = -INFINITY
+        cdef float64_t best_proxy_improvement = -INFINITY
 
         cdef intp_t p
         cdef intp_t feat_i
@@ -665,10 +665,10 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
         Criterion criterion,
         intp_t max_features,
         intp_t min_samples_leaf,
-        double min_weight_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
+        float64_t feature_combinations,
         const intp_t[:] feature_set_ends,
         intp_t n_feature_sets,
         *argv
@@ -789,10 +789,10 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
         Criterion criterion,
         intp_t max_features,
         intp_t min_samples_leaf,
-        double min_weight_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
+        float64_t feature_combinations,
         const intp_t[:] feature_set_ends,
         intp_t n_feature_sets,
         bint uniform_sampling,

diff --git a/sktree/tree/_oblique_tree.pyx b/sktree/tree/_oblique_tree.pyx
@@ -72,13 +72,13 @@ cdef class ObliqueTree(Tree):
     feature : array of intp_t, shape [node_count]
         feature[i] holds the feature to split on, for the internal node i.
 
-    threshold : array of double, shape [node_count]
+    threshold : array of float64_t, shape [node_count]
         threshold[i] holds the threshold for the internal node i.
 
-    value : array of double, shape [node_count, n_outputs, max_n_classes]
+    value : array of float64_t, shape [node_count, n_outputs, max_n_classes]
         Contains the constant prediction value of each node.
 
-    impurity : array of double, shape [node_count]
+    impurity : array of float64_t, shape [node_count]
         impurity[i] holds the impurity (i.e., the value of the splitting
         criterion) at node i.
 
@@ -179,7 +179,7 @@ cdef class ObliqueTree(Tree):
         memcpy(self.nodes, cnp.PyArray_DATA(node_ndarray),
                self.capacity * sizeof(Node))
         memcpy(self.value, cnp.PyArray_DATA(value_ndarray),
-               self.capacity * self.value_stride * sizeof(double))
+               self.capacity * self.value_stride * sizeof(float64_t))
 
     cpdef cnp.ndarray get_projection_matrix(self):
         """Get the projection matrix of shape (node_count, n_features)."""
@@ -220,7 +220,7 @@ cdef class ObliqueTree(Tree):
         # value memory is initialised to 0 to enable classifier argmax
         if capacity > self.capacity:
             memset(<void*>(self.value + self.capacity * self.value_stride), 0,
-                   (capacity - self.capacity) * self.value_stride * sizeof(double))
+                   (capacity - self.capacity) * self.value_stride * sizeof(float64_t))
 
         # if capacity smaller than node_count, adjust the counter
         if capacity < self.node_count:

diff --git a/sktree/tree/_utils.pxd b/sktree/tree/_utils.pxd
@@ -4,8 +4,8 @@ cimport numpy as cnp
 
 cnp.import_array()
 
-from sktree._lib.sklearn.tree._splitter cimport SplitRecord
-from sktree._lib.sklearn.utils._typedefs cimport float32_t, float64_t, int32_t, intp_t, uint32_t
+from .._lib.sklearn.tree._splitter cimport SplitRecord
+from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, int32_t, intp_t, uint32_t
 
 
 cdef int rand_weighted_binary(float64_t p0, uint32_t* random_state) noexcept nogil

diff --git a/sktree/tree/_utils.pyx b/sktree/tree/_utils.pyx
@@ -11,7 +11,7 @@ cimport numpy as cnp
 
 cnp.import_array()
 
-from sktree._lib.sklearn.tree._utils cimport rand_uniform
+from .._lib.sklearn.tree._utils cimport rand_uniform
 
 
 cdef inline int rand_weighted_binary(float64_t p0, uint32_t* random_state) noexcept nogil:

diff --git a/sktree/tree/manifold/_morf_splitter.pyx b/sktree/tree/manifold/_morf_splitter.pyx
@@ -45,7 +45,7 @@ cdef class PatchSplitter(BestObliqueSplitter):
         self,
         intp_t start,
         intp_t end,
-        double* weighted_n_node_samples
+        float64_t* weighted_n_node_samples
     ) except -1 nogil:
         """Reset splitter on node samples[start:end].
 
@@ -58,7 +58,7 @@ cdef class PatchSplitter(BestObliqueSplitter):
             The index of the first sample to consider
         end : intp_t
             The index of the last sample to consider
-        weighted_n_node_samples : ndarray, dtype=double pointer
+        weighted_n_node_samples : ndarray, dtype=float64_t pointer
             The total weight of those samples
         """
 
@@ -122,10 +122,10 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
         Criterion criterion,
         intp_t max_features,
         intp_t min_samples_leaf,
-        double min_weight_leaf,
+        float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        double feature_combinations,
+        float64_t feature_combinations,
         const intp_t[:] min_patch_dims,
         const intp_t[:] max_patch_dims,
         const cnp.uint8_t[:] dim_contiguous,

diff --git a/sktree/tree/unsupervised/_unsup_criterion.pxd b/sktree/tree/unsupervised/_unsup_criterion.pxd
@@ -33,13 +33,13 @@ cdef class UnsupervisedCriterion(BaseCriterion):
     # the left and right node. For example, this can then efficiently compute the
     # mean of the node, and left/right child by subtracting relevant Xf elements
     # and then dividing by the total number of samples in the node and left/right child.
-    cdef double sum_total     # The sum of the weighted count of each feature.
-    cdef double sum_left      # Same as above, but for the left side of the split
-    cdef double sum_right     # Same as above, but for the right side of the split
+    cdef float64_t sum_total     # The sum of the weighted count of each feature.
+    cdef float64_t sum_left      # Same as above, but for the left side of the split
+    cdef float64_t sum_right     # Same as above, but for the right side of the split
 
-    cdef double sumsq_total     # The sum of the weighted count of each feature.
-    cdef double sumsq_left      # Same as above, but for the left side of the split
-    cdef double sumsq_right     # Same as above, but for the right side of the split
+    cdef float64_t sumsq_total     # The sum of the weighted count of each feature.
+    cdef float64_t sumsq_left      # Same as above, but for the left side of the split
+    cdef float64_t sumsq_right     # Same as above, but for the right side of the split
 
     # Methods
     # -------
@@ -50,7 +50,7 @@ cdef class UnsupervisedCriterion(BaseCriterion):
         self,
         const float32_t[:] feature_values,
         const float64_t[:] sample_weight,
-        double weighted_n_samples,
+        float64_t weighted_n_samples,
         const intp_t[:] samples,
     ) except -1 nogil