Skip to content

Commit

Permalink
MCAR for decision tree
Browse files Browse the repository at this point in the history
Signed-off-by: Adam Li <adam2392@gmail.com>
  • Loading branch information
adam2392 committed Apr 4, 2024
1 parent 64738a3 commit 5f0ac4c
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions sklearn/tree/tests/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -2602,6 +2602,35 @@ def test_missing_value_is_predictive():
assert tree.score(X_test, y_test) >= 0.85


def test_missing_value_is_not_predictive_with_mcar():
"""Check the tree doesnt learns when the missing value is forced to be unpredictive."""
rng = np.random.RandomState(0)
n_samples = 1000

X = rng.standard_normal(size=(n_samples, 10))
y = rng.randint(0, high=2, size=n_samples)

# Create a predictive feature using `y` and with some noise
X_random_mask = rng.choice([False, True], size=n_samples, p=[0.9, 0.1])
y_mask = y.copy().astype(bool)
y_mask[X_random_mask] = ~y_mask[X_random_mask]

X_predictive = rng.standard_normal(size=n_samples)
X_predictive[y_mask] = np.nan

X[:, 5] = X_predictive

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
tree = DecisionTreeClassifier(random_state=rng, missing_car=True).fit(
X_train, y_train
)
non_mcar_tree = DecisionTreeClassifier(random_state=rng, missing_car=False).fit(
X_train, y_train
)

non_mcar_tree.score(X_test, y_test) > tree.score(X_test, y_test) + 0.2


@pytest.mark.parametrize(
"make_data, Tree",
[
Expand Down

0 comments on commit 5f0ac4c

Please sign in to comment.