Skip to content

Commit

Permalink
Also convert double to float64_t
Browse files Browse the repository at this point in the history
Signed-off-by: Adam Li <adam2392@gmail.com>
  • Loading branch information
adam2392 committed Oct 12, 2023
1 parent 82927e6 commit 06cac3b
Show file tree
Hide file tree
Showing 19 changed files with 166 additions and 166 deletions.
2 changes: 1 addition & 1 deletion build_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ meson-python
cython==0.29.36
ninja
numpy
scikit-learn>=1.3
scikit-learn>=1.3.1
click
rich-click
doit
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ include = [
dependencies = [
'numpy',
'scipy>=1.5.0',
'scikit-learn>=1.3'
'scikit-learn>=1.3.1'
]


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numpy>=1.25
scipy
scikit-learn>=1.3
scikit-learn>=1.3.1
4 changes: 2 additions & 2 deletions sktree/tree/_marginal.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
cdef float32_t X_i_node_feature

cdef float32_t n_node_samples, n_right_samples, n_left_samples
cdef double p_left
cdef float64_t p_left
cdef intp_t is_left

# Initialize output
Expand Down Expand Up @@ -172,7 +172,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
n_right_samples = tree.nodes[node.right_child].n_node_samples

# compute the probabilies for going left and right
p_left = (<double>n_left_samples / n_node_samples)
p_left = (<float64_t>n_left_samples / n_node_samples)

# randomly sample a direction
is_left = rand_weighted_binary(p_left, rand_r_state)
Expand Down
32 changes: 16 additions & 16 deletions sktree/tree/_oblique_splitter.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ cdef struct ObliqueSplitRecord:
intp_t pos # Split samples array at the given position,
# # i.e. count of samples below threshold for feature.
# # pos is >= end if the node is a leaf.
double threshold # Threshold to split at.
double improvement # Impurity improvement given parent node.
double impurity_left # Impurity of the left split.
double impurity_right # Impurity of the right split.
float64_t threshold # Threshold to split at.
float64_t improvement # Impurity improvement given parent node.
float64_t impurity_left # Impurity of the left split.
float64_t impurity_right # Impurity of the right split.

vector[float32_t]* proj_vec_weights # weights of the vector (max_features,)
vector[intp_t]* proj_vec_indices # indices of the features (max_features,)
Expand Down Expand Up @@ -62,7 +62,7 @@ cdef class BaseObliqueSplitter(Splitter):
self,
intp_t start,
intp_t end,
double* weighted_n_node_samples
float64_t* weighted_n_node_samples
) except -1 nogil

cdef void compute_features_over_samples(
Expand All @@ -77,11 +77,11 @@ cdef class BaseObliqueSplitter(Splitter):

cdef intp_t node_split(
self,
double impurity, # Impurity of the node
float64_t impurity, # Impurity of the node
SplitRecord* split,
intp_t* n_constant_features,
double lower_bound,
double upper_bound,
float64_t lower_bound,
float64_t upper_bound,
) except -1 nogil

cdef inline void fisher_yates_shuffle_memview(
Expand All @@ -96,7 +96,7 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
# to split the samples samples[start:end].

# Oblique Splitting extra parameters
cdef public double feature_combinations # Number of features to combine
cdef public float64_t feature_combinations # Number of features to combine
cdef intp_t n_non_zeros # Number of non-zero features
cdef intp_t[::1] indices_to_sample # an array of indices to sample of size mtry X n_features

Expand All @@ -113,11 +113,11 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
cdef class BestObliqueSplitter(ObliqueSplitter):
cdef intp_t node_split(
self,
double impurity, # Impurity of the node
float64_t impurity, # Impurity of the node
SplitRecord* split,
intp_t* n_constant_features,
double lower_bound,
double upper_bound,
float64_t lower_bound,
float64_t upper_bound,
) except -1 nogil


Expand All @@ -131,16 +131,16 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):

cdef intp_t partition_samples(
self,
double current_threshold
float64_t current_threshold
) noexcept nogil

cdef intp_t node_split(
self,
double impurity, # Impurity of the node
float64_t impurity, # Impurity of the node
SplitRecord* split,
intp_t* n_constant_features,
double lower_bound,
double upper_bound,
float64_t lower_bound,
float64_t upper_bound,
) except -1 nogil


Expand Down
46 changes: 23 additions & 23 deletions sktree/tree/_oblique_splitter.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ from sklearn.tree._utils cimport rand_int, rand_uniform
from .._lib.sklearn.tree._criterion cimport Criterion


cdef double INFINITY = np.inf
cdef float64_t INFINITY = np.inf

# Mitigate precision differences between 32 bit and 64 bit
cdef float32_t FEATURE_THRESHOLD = 1e-7
Expand Down Expand Up @@ -45,7 +45,7 @@ cdef class BaseObliqueSplitter(Splitter):
pass

cdef intp_t node_reset(self, intp_t start, intp_t end,
double* weighted_n_node_samples) except -1 nogil:
float64_t* weighted_n_node_samples) except -1 nogil:
"""Reset splitter on node samples[start:end].
Returns -1 in case of failure to allocate memory (and raise MemoryError)
Expand All @@ -57,7 +57,7 @@ cdef class BaseObliqueSplitter(Splitter):
The index of the first sample to consider
end : intp_t
The index of the last sample to consider
weighted_n_node_samples : ndarray, dtype=double pointer
weighted_n_node_samples : ndarray, dtype=float64_t pointer
The total weight of those samples
"""

Expand Down Expand Up @@ -144,10 +144,10 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
Criterion criterion,
intp_t max_features,
intp_t min_samples_leaf,
double min_weight_leaf,
float64_t min_weight_leaf,
object random_state,
const cnp.int8_t[:] monotonic_cst,
double feature_combinations,
float64_t feature_combinations,
*argv
):
"""
Expand All @@ -165,11 +165,11 @@ cdef class ObliqueSplitter(BaseObliqueSplitter):
which would result in having less samples in a leaf are not
considered.
min_weight_leaf : double
min_weight_leaf : float64_t
The minimal weight each leaf can have, where the weight is the sum
of the weights of each sample in it.
feature_combinations : double
feature_combinations : float64_t
The average number of features to combine in an oblique split.
Each feature is independently included with probability
``feature_combination`` / ``n_features``.
Expand Down Expand Up @@ -290,11 +290,11 @@ cdef class BestObliqueSplitter(ObliqueSplitter):

cdef intp_t node_split(
self,
double impurity,
float64_t impurity,
SplitRecord* split,
intp_t* n_constant_features,
double lower_bound,
double upper_bound,
float64_t lower_bound,
float64_t upper_bound,
) except -1 nogil:
"""Find the best_split split on node samples[start:end]
Expand All @@ -317,8 +317,8 @@ cdef class BestObliqueSplitter(ObliqueSplitter):
# keep track of split record for current_split node and the best_split split
# found among the sampled projection vectors
cdef ObliqueSplitRecord best_split, current_split
cdef double current_proxy_improvement = -INFINITY
cdef double best_proxy_improvement = -INFINITY
cdef float64_t current_proxy_improvement = -INFINITY
cdef float64_t best_proxy_improvement = -INFINITY

cdef intp_t feat_i, p # index over computed features and start/end
cdef intp_t partition_end
Expand Down Expand Up @@ -472,7 +472,7 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
min_feature_value_out[0] = min_feature_value
max_feature_value_out[0] = max_feature_value

cdef inline intp_t partition_samples(self, double current_threshold) noexcept nogil:
cdef inline intp_t partition_samples(self, float64_t current_threshold) noexcept nogil:
"""Partition samples for feature_values at the current_threshold."""
cdef:
intp_t p = self.start
Expand All @@ -496,11 +496,11 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
# overwrite the node_split method with random threshold selection
cdef intp_t node_split(
self,
double impurity,
float64_t impurity,
SplitRecord* split,
intp_t* n_constant_features,
double lower_bound,
double upper_bound,
float64_t lower_bound,
float64_t upper_bound,
) except -1 nogil:
"""Find the best_split split on node samples[start:end]
Expand All @@ -520,13 +520,13 @@ cdef class RandomObliqueSplitter(ObliqueSplitter):
cdef float32_t[::1] feature_values = self.feature_values
cdef intp_t max_features = self.max_features
cdef intp_t min_samples_leaf = self.min_samples_leaf
cdef double min_weight_leaf = self.min_weight_leaf
cdef float64_t min_weight_leaf = self.min_weight_leaf

# keep track of split record for current_split node and the best_split split
# found among the sampled projection vectors
cdef ObliqueSplitRecord best_split, current_split
cdef double current_proxy_improvement = -INFINITY
cdef double best_proxy_improvement = -INFINITY
cdef float64_t current_proxy_improvement = -INFINITY
cdef float64_t best_proxy_improvement = -INFINITY

cdef intp_t p
cdef intp_t feat_i
Expand Down Expand Up @@ -665,10 +665,10 @@ cdef class MultiViewSplitter(BestObliqueSplitter):
Criterion criterion,
intp_t max_features,
intp_t min_samples_leaf,
double min_weight_leaf,
float64_t min_weight_leaf,
object random_state,
const cnp.int8_t[:] monotonic_cst,
double feature_combinations,
float64_t feature_combinations,
const intp_t[:] feature_set_ends,
intp_t n_feature_sets,
*argv
Expand Down Expand Up @@ -789,10 +789,10 @@ cdef class MultiViewObliqueSplitter(BestObliqueSplitter):
Criterion criterion,
intp_t max_features,
intp_t min_samples_leaf,
double min_weight_leaf,
float64_t min_weight_leaf,
object random_state,
const cnp.int8_t[:] monotonic_cst,
double feature_combinations,
float64_t feature_combinations,
const intp_t[:] feature_set_ends,
intp_t n_feature_sets,
bint uniform_sampling,
Expand Down
10 changes: 5 additions & 5 deletions sktree/tree/_oblique_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ cdef class ObliqueTree(Tree):
feature : array of intp_t, shape [node_count]
feature[i] holds the feature to split on, for the internal node i.
threshold : array of double, shape [node_count]
threshold : array of float64_t, shape [node_count]
threshold[i] holds the threshold for the internal node i.
value : array of double, shape [node_count, n_outputs, max_n_classes]
value : array of float64_t, shape [node_count, n_outputs, max_n_classes]
Contains the constant prediction value of each node.
impurity : array of double, shape [node_count]
impurity : array of float64_t, shape [node_count]
impurity[i] holds the impurity (i.e., the value of the splitting
criterion) at node i.
Expand Down Expand Up @@ -179,7 +179,7 @@ cdef class ObliqueTree(Tree):
memcpy(self.nodes, cnp.PyArray_DATA(node_ndarray),
self.capacity * sizeof(Node))
memcpy(self.value, cnp.PyArray_DATA(value_ndarray),
self.capacity * self.value_stride * sizeof(double))
self.capacity * self.value_stride * sizeof(float64_t))

cpdef cnp.ndarray get_projection_matrix(self):
"""Get the projection matrix of shape (node_count, n_features)."""
Expand Down Expand Up @@ -220,7 +220,7 @@ cdef class ObliqueTree(Tree):
# value memory is initialised to 0 to enable classifier argmax
if capacity > self.capacity:
memset(<void*>(self.value + self.capacity * self.value_stride), 0,
(capacity - self.capacity) * self.value_stride * sizeof(double))
(capacity - self.capacity) * self.value_stride * sizeof(float64_t))

# if capacity smaller than node_count, adjust the counter
if capacity < self.node_count:
Expand Down
4 changes: 2 additions & 2 deletions sktree/tree/_utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ cimport numpy as cnp

cnp.import_array()

from sktree._lib.sklearn.tree._splitter cimport SplitRecord
from sktree._lib.sklearn.utils._typedefs cimport float32_t, float64_t, int32_t, intp_t, uint32_t
from .._lib.sklearn.tree._splitter cimport SplitRecord
from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, int32_t, intp_t, uint32_t


cdef int rand_weighted_binary(float64_t p0, uint32_t* random_state) noexcept nogil
Expand Down
2 changes: 1 addition & 1 deletion sktree/tree/_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ cimport numpy as cnp

cnp.import_array()

from sktree._lib.sklearn.tree._utils cimport rand_uniform
from .._lib.sklearn.tree._utils cimport rand_uniform


cdef inline int rand_weighted_binary(float64_t p0, uint32_t* random_state) noexcept nogil:
Expand Down
8 changes: 4 additions & 4 deletions sktree/tree/manifold/_morf_splitter.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ cdef class PatchSplitter(BestObliqueSplitter):
self,
intp_t start,
intp_t end,
double* weighted_n_node_samples
float64_t* weighted_n_node_samples
) except -1 nogil:
"""Reset splitter on node samples[start:end].
Expand All @@ -58,7 +58,7 @@ cdef class PatchSplitter(BestObliqueSplitter):
The index of the first sample to consider
end : intp_t
The index of the last sample to consider
weighted_n_node_samples : ndarray, dtype=double pointer
weighted_n_node_samples : ndarray, dtype=float64_t pointer
The total weight of those samples
"""

Expand Down Expand Up @@ -122,10 +122,10 @@ cdef class BestPatchSplitter(BaseDensePatchSplitter):
Criterion criterion,
intp_t max_features,
intp_t min_samples_leaf,
double min_weight_leaf,
float64_t min_weight_leaf,
object random_state,
const cnp.int8_t[:] monotonic_cst,
double feature_combinations,
float64_t feature_combinations,
const intp_t[:] min_patch_dims,
const intp_t[:] max_patch_dims,
const cnp.uint8_t[:] dim_contiguous,
Expand Down
14 changes: 7 additions & 7 deletions sktree/tree/unsupervised/_unsup_criterion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ cdef class UnsupervisedCriterion(BaseCriterion):
# the left and right node. For example, this can then efficiently compute the
# mean of the node, and left/right child by subtracting relevant Xf elements
# and then dividing by the total number of samples in the node and left/right child.
cdef double sum_total # The sum of the weighted count of each feature.
cdef double sum_left # Same as above, but for the left side of the split
cdef double sum_right # Same as above, but for the right side of the split
cdef float64_t sum_total # The sum of the weighted count of each feature.
cdef float64_t sum_left # Same as above, but for the left side of the split
cdef float64_t sum_right # Same as above, but for the right side of the split

cdef double sumsq_total # The sum of the weighted count of each feature.
cdef double sumsq_left # Same as above, but for the left side of the split
cdef double sumsq_right # Same as above, but for the right side of the split
cdef float64_t sumsq_total # The sum of the weighted count of each feature.
cdef float64_t sumsq_left # Same as above, but for the left side of the split
cdef float64_t sumsq_right # Same as above, but for the right side of the split

# Methods
# -------
Expand All @@ -50,7 +50,7 @@ cdef class UnsupervisedCriterion(BaseCriterion):
self,
const float32_t[:] feature_values,
const float64_t[:] sample_weight,
double weighted_n_samples,
float64_t weighted_n_samples,
const intp_t[:] samples,
) except -1 nogil

Expand Down
Loading

0 comments on commit 06cac3b

Please sign in to comment.