Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT Update submodule commit and remove cnp imports where possible #249

Merged
merged 13 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sktree/_lib/sklearn_fork
Submodule sklearn_fork updated 158 files
96 changes: 87 additions & 9 deletions sktree/tree/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from scipy.sparse import issparse
from sklearn.base import ClusterMixin, TransformerMixin
from sklearn.cluster import AgglomerativeClustering
from sklearn.utils import check_random_state
from sklearn.utils._param_validation import Interval
from sklearn.utils.validation import check_is_fitted

Expand Down Expand Up @@ -918,6 +919,7 @@ def _build_tree(
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
_, n_features = X.shape

if self.feature_combinations is None:
Expand Down Expand Up @@ -963,7 +965,7 @@ def _build_tree(

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -972,7 +974,7 @@ def _build_tree(
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -982,7 +984,7 @@ def _build_tree(
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
Expand All @@ -996,6 +998,78 @@ def _inheritable_fitted_attribute(self):
"feature_combinations_",
]

def _update_tree(self, X, y, sample_weight):
# Update tree
max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes
min_samples_split = self.min_samples_split_
min_samples_leaf = self.min_samples_leaf_
min_weight_leaf = self.min_weight_leaf_
# set decision-tree model parameters
max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth

monotonic_cst = self.monotonic_cst_

# Build tree
# Note: this reconstructs the builder with the same state it had during the
# initial fit. This is necessary because the builder is not saved as part
# of the class, and thus the state may be lost if pickled/unpickled.
criterion = self.criterion
if not isinstance(criterion, BaseCriterion):
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self._n_classes_)
else:
# Make a deepcopy in case the criterion has mutable attributes that
# might be shared and modified concurrently during parallel fitting
criterion = copy.deepcopy(criterion)

random_state = check_random_state(self.random_state)

splitter = self.splitter
if issparse(X):
raise ValueError(
"Sparse input is not supported for oblique trees. "
"Please convert your data to a dense array."
)
else:
SPLITTERS = OBLIQUE_DENSE_SPLITTERS
if not isinstance(self.splitter, ObliqueSplitter):
splitter = SPLITTERS[self.splitter](
criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state,
monotonic_cst,
self.feature_combinations_,
)

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
self.store_leaf_values,
)
else:
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
self.store_leaf_values,
)
builder.initialize_node_queue(self.tree_, X, y, sample_weight)
builder.build(self.tree_, X, y, sample_weight)

self._prune_tree()
return self


class ObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
"""An oblique decision tree Regressor.
Expand Down Expand Up @@ -1785,6 +1859,7 @@ def _build_tree(
)

monotonic_cst = None
self.monotonic_cst_ = monotonic_cst

# Build tree
criterion = self.criterion
Expand Down Expand Up @@ -1825,7 +1900,7 @@ def _build_tree(

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -1834,7 +1909,7 @@ def _build_tree(
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -1844,7 +1919,7 @@ def _build_tree(
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
Expand Down Expand Up @@ -2263,6 +2338,7 @@ def _build_tree(
)

monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
n_samples = X.shape[0]

# Build tree
Expand Down Expand Up @@ -2692,6 +2768,7 @@ def _build_tree(
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
_, n_features = X.shape

if self.feature_combinations is None:
Expand Down Expand Up @@ -2737,7 +2814,7 @@ def _build_tree(

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -2746,7 +2823,7 @@ def _build_tree(
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -2756,7 +2833,7 @@ def _build_tree(
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
Expand Down Expand Up @@ -3088,6 +3165,7 @@ def _build_tree(
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
n_samples, n_features = X.shape

if self.feature_combinations is None:
Expand Down
4 changes: 3 additions & 1 deletion sktree/tree/_honest_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,10 @@ def _inherit_estimator_attributes(self):
self.tree_ = self.estimator_.tree_

# XXX: scikit-learn trees do not store their builder, or min_samples_split_
self.builder_ = getattr(self.estimator_, "builder_", None)
self.min_samples_split_ = getattr(self.estimator_, "min_samples_split_", None)
self.min_samples_leaf_ = getattr(self.estimator_, "min_samples_leaf_", None)
self.min_weight_leaf_ = getattr(self.estimator_, "min_weight_leaf_", None)
self.monotonic_cst_ = getattr(self.estimator_, "monotonic_cst_", None)

def _empty_leaf_correction(self, proba, pos=0):
"""Leaves with empty posteriors are assigned values.
Expand Down
3 changes: 1 addition & 2 deletions sktree/tree/_marginal.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ import numpy as np
cimport numpy as cnp

from .._lib.sklearn.tree._tree cimport BaseTree, Node
from .._lib.sklearn.tree._utils cimport UINT32_t
from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t
from .._lib.sklearn.utils._typedefs cimport float32_t, float64_t, intp_t, uint32_t


cpdef apply_marginal_tree(
Expand Down
6 changes: 3 additions & 3 deletions sktree/tree/_marginal.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ cpdef apply_marginal_tree(
cdef intp_t n_marginals = marginal_indices.shape[0]

# sklearn_rand_r random number state
cdef UINT32_t rand_r_state = random_state.randint(0, RAND_R_MAX)
cdef uint32_t rand_r_state = random_state.randint(0, RAND_R_MAX)

# define a set of all marginal indices
cdef unordered_set[intp_t] marginal_indices_map
Expand Down Expand Up @@ -108,7 +108,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
unordered_set[intp_t] marginal_indices_map,
intp_t traversal_method,
unsigned char use_sample_weight,
UINT32_t* rand_r_state
uint32_t* rand_r_state
):
"""Finds the terminal region (=leaf node) for each sample in X.

Expand All @@ -131,7 +131,7 @@ cdef inline cnp.ndarray _apply_dense_marginal(
use_sample_weight : unsigned char
Whether or not to use the weighted number of samples
in each node.
rand_r_state : UINT32_t
rand_r_state : uint32_t
The random number state.
"""
# Extract input
Expand Down
83 changes: 80 additions & 3 deletions sktree/tree/_multiview.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import numpy as np
from scipy.sparse import issparse
from sklearn.utils import check_random_state
from sklearn.utils._param_validation import Interval, RealNotInt, StrOptions

from .._lib.sklearn.tree import DecisionTreeClassifier, _criterion
Expand Down Expand Up @@ -360,6 +361,7 @@
Controls the randomness of the estimator.
"""
monotonic_cst = None
self.monotonic_cst_ = monotonic_cst
_, n_features = X.shape

self.feature_combinations_ = 1
Expand Down Expand Up @@ -495,7 +497,7 @@

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
self.builder_ = DepthFirstTreeBuilder(
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -504,7 +506,7 @@
self.min_impurity_decrease,
)
else:
self.builder_ = BestFirstTreeBuilder(
builder = BestFirstTreeBuilder(

Check warning on line 509 in sktree/tree/_multiview.py

View check run for this annotation

Codecov / codecov/patch

sktree/tree/_multiview.py#L509

Added line #L509 was not covered by tests
splitter,
min_samples_split,
min_samples_leaf,
Expand All @@ -514,12 +516,87 @@
self.min_impurity_decrease,
)

self.builder_.build(self.tree_, X, y, sample_weight, None)
builder.build(self.tree_, X, y, sample_weight, None)

if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
self.classes_ = self.classes_[0]

def _update_tree(self, X, y, sample_weight):
# Update tree
max_leaf_nodes = -1 if self.max_leaf_nodes is None else self.max_leaf_nodes
min_samples_split = self.min_samples_split_
min_samples_leaf = self.min_samples_leaf_
min_weight_leaf = self.min_weight_leaf_
# set decision-tree model parameters
max_depth = np.iinfo(np.int32).max if self.max_depth is None else self.max_depth

monotonic_cst = self.monotonic_cst_

# Build tree
# Note: this reconstructs the builder with the same state it had during the
# initial fit. This is necessary because the builder is not saved as part
# of the class, and thus the state may be lost if pickled/unpickled.
criterion = self.criterion
if not isinstance(criterion, BaseCriterion):
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_, self._n_classes_)
else:
# Make a deepcopy in case the criterion has mutable attributes that
# might be shared and modified concurrently during parallel fitting
criterion = copy.deepcopy(criterion)

Check warning on line 546 in sktree/tree/_multiview.py

View check run for this annotation

Codecov / codecov/patch

sktree/tree/_multiview.py#L546

Added line #L546 was not covered by tests

random_state = check_random_state(self.random_state)

splitter = self.splitter
if issparse(X):
raise ValueError(

Check warning on line 552 in sktree/tree/_multiview.py

View check run for this annotation

Codecov / codecov/patch

sktree/tree/_multiview.py#L552

Added line #L552 was not covered by tests
"Sparse input is not supported for oblique trees. "
"Please convert your data to a dense array."
)
else:
SPLITTERS = DENSE_SPLITTERS
if not isinstance(self.splitter, ObliqueSplitter):
splitter = SPLITTERS[self.splitter](
criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state,
monotonic_cst,
self.feature_combinations_,
self.feature_set_ends_,
self.n_feature_sets_,
self.max_features_per_set_,
)

# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
self.store_leaf_values,
)
else:
builder = BestFirstTreeBuilder(

Check warning on line 584 in sktree/tree/_multiview.py

View check run for this annotation

Codecov / codecov/patch

sktree/tree/_multiview.py#L584

Added line #L584 was not covered by tests
splitter,
min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
self.store_leaf_values,
)
builder.initialize_node_queue(self.tree_, X, y, sample_weight)
builder.build(self.tree_, X, y, sample_weight)

self._prune_tree()
return self

def fit(self, X, y, sample_weight=None, check_input=True, classes=None):
"""Build a decision tree classifier from the training set (X, y).

Expand Down
Loading
Loading