-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDecisionTree_Basics.R
50 lines (41 loc) · 1.39 KB
/
DecisionTree_Basics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#Decision Tree-
#Regression: Predicitng Continuous Dependent Var Values.
#Classification: Predicting Categorical Dependent Var Values.
#Pruning:It is a technique used in determining the size of the tree.
#install.packages("rpart")
#install.packages("rpart.plot")
library("rpart")
library("rpart.plot")
#Read dataset----
data("iris")
data_iris <- iris
View(data_iris)
#explore iris data set structure by str() command.
str(iris)
#Create training dataset and testing dataset----
indexes = sample(150, 110)
iris_train = iris[indexes,]
iris_test = iris[-indexes,]
View(iris_train)
View(iris_test)
#Build and plot model----
#Classification Tree-
ctree = rpart(Species ~., data = iris_train, method = "class")
rpart.plot(ctree)
#Regression Tree-
rtree = rpart(Sepal.Length ~., data = iris_train, method = "anova")
rpart.plot(rtree)
print(ctree)
#Predicitng:----
#Classification Tree: Predicting whether the species is "Setosa" or not,
iris_test$Prediction_Species = predict(ctree, iris_test)
View(iris_test)
#Regression Tree: Predicting the lenth of Sepal,
iris_test$Prediction_Sepal.Length = predict(rtree, iris_test)
View(iris_test)
#Pruning----
tree_ms3 = rpart(Species~., iris_train,control = rpart.control(minsplit = 3))
tree_ms100 = rpart(Species~., iris_train, control = rpart.control(minsplit = 100))
par(mfcol = c(1, 2))
rpart.plot(tree_ms3, main = "minsplit=3")
rpart.plot(tree_ms100, main = "minsplit=100")