-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3.m
108 lines (81 loc) · 2.97 KB
/
3.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
clear ; close all; clc
[data,txt] = xlsread('train.xlsx');
f = figure(1);
gscatter(data(:,2), data(:,3), data(:,4),'rgb','osd');
xlabel('weight');
ylabel('time');
%Linear Discriminant Analysis (LDA)
lda = fitcdiscr(data(:,2:3),data(:,4));
ldaClass = resubPredict(lda);
%LDA resubstitution error
ldaResubErr = resubLoss(lda)
%ldaResubErr =0.0758
% confusion matrix
ldaResubCM = confusionmat(data(:,4),ldaClass);
%ldaResubCM =[141,6,0;18,79,0;0,14,243]
figure(2)
[x,y] = meshgrid(35:.1:70,0:.1:8);
x = x(:);
y = y(:);
j = classify([x y],data(:,2:3),data(:,4));
gscatter(x,y,j,'rgb','osd')
%Quadratic Discriminant Analysis (QDA)
qda = fitcdiscr(data(:,2:3), data(:,4),'DiscrimType','quadratic');
%QDA resubstitution error
qdaResubErr = resubLoss(qda) %qdaResubErr =0.0758
rng(0,'twister');
cp = cvpartition(data(:,4),'KFold',10)
%K-fold cross validation partition
%cp.NumTestSets=10
%cp.TrainSize=[451,450,451,451,451,451,451,451,451,451]
%cp.TestSize=[50,51,50,50,50,50,50,50,50,50]
%cp.NumObservations=501
cvlda = crossval(lda,'CVPartition',cp);
ldaCVErr = kfoldLoss(cvlda) %ldaCVErr =0.0778
cvqda = crossval(qda,'CVPartition',cp);
qdaCVErr = kfoldLoss(cvqda) %qdaCVErr =0.0778
%Naive Bayes Classifiers
nbGau = fitcnb(data(:,2:3), data(:,4));
%Naive Bayes Gaussian distribution resubstitution error
nbGauResubErr = resubLoss(nbGau) %nbGauResubErr =0.0898
nbGauCV = crossval(nbGau, 'CVPartition',cp);
nbGauCVErr = kfoldLoss(nbGauCV) %nbGauCVErr =0.0918
figure(3)
labels = predict(nbGau, [x y]);
gscatter(x,y,labels,'rgb','osd')
% using a kernel density estimation
nbKD = fitcnb(data(:,2:3), data(:,4), 'DistributionNames','kernel', 'Kernel','box');
nbKDResubErr = resubLoss(nbKD) %nbKDResubErr =0.0918
nbKDCV = crossval(nbKD, 'CVPartition',cp);
nbKDCVErr = kfoldLoss(nbKDCV) %nbKDCVErr =0.0938
figure(4)
labels = predict(nbKD, [x y]);
gscatter(x,y,labels,'rgb','osd')
%Decision Tree
t = fitctree(data(:,2:3),data(:,4),'PredictorNames',{'W' 'T' });
[grpname,node] = predict(t,[x y]);
figure(5)
gscatter(x,y,grpname,'grb','sod')
view(t,'Mode','graph');
dtResubErr = resubLoss(t) %dtResubErr =0.0399
cvt = crossval(t,'CVPartition',cp);
dtCVErr = kfoldLoss(cvt) %dtCVErr =0.0958
resubcost = resubLoss(t,'Subtrees','all');
[cost,secost,ntermnodes,bestlevel] = cvloss(t,'Subtrees','all');
plot(ntermnodes,cost,'b-', ntermnodes,resubcost,'r--')
figure(gcf);
xlabel('Number of terminal nodes');
ylabel('Cost (misclassification error)')
legend('Cross-validation','Resubstitution')
% "best" tree level with minimum cost plus one standard error
[mincost,minloc] = min(cost);
cutoff = mincost + secost(minloc);
hold on
plot([0 20], [cutoff cutoff], 'k:')
plot(ntermnodes(bestlevel+1), cost(bestlevel+1), 'mo')
legend('Cross-validation','Resubstitution','Min + 1 std. err.','Best choice')
hold off
% pruned tree
pt = prune(t,'Level',bestlevel);
view(pt,'Mode','graph')
cost(bestlevel+1) %cost=0.1018