-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathML_Perceptron.R
291 lines (227 loc) · 11 KB
/
ML_Perceptron.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
#Name: Swaroop S Bhat
#Student Id: 17230755
#Class: 178-CT475 (MSc Data Analytics)
#********************************************************************************
#Installing the below packages is necerssary if it is not installed in the system
# install.packages("ggplot2")
# install.packages("dplyr")
# install.packages("lubridate")
# install.packages("readr")
# install.packages("stringr")
library("ggplot2")
library("dplyr")
library("lubridate")
library("readr")
library("stringr")
#Note: Program runs for approximately 4 minutes due to 1000 epochs (loops to converge error)
#********************************************* NOTE *************************************************
#Steps to run the program
#1. set the R directory to the directory where the data set is stored. setwd("path")
#2. Run the program. (Everything is prameterized, hence no need to call any functions)
#Note: For convinencce purpose.. Splitting of data to training (2/3) and testing (1/3)
#is already done. And calling the training and prediction function is already parametized
#and hence, no need to call the functions explicitly.
#****************************************************************************************************
options(warn = -1)
Data_set = suppressMessages(read_csv("owls15.csv", col_names = FALSE))
colnames(Data_set) = c("Body_Length", "Wing_Length", "Body_Width", "Wing_Width", "Type")
start_time = Sys.time()#To calculate elapsed time of the program
Estimated_Accuracy = vector(mode = 'numeric', length = 10)#To store accuracy over repetition
for(k in 1:10)#10 random samples to estimate future accuracy
{
Perceptron_Alg = function(Data_set)
{
Owl_data = Data_set
z_norm <- function(x){((x - min(x))/(max(x) - min(x)))}#Normalising data
Owl_data$Type <- as.factor(Owl_data$Type)
Nrm_data <- as_data_frame(sapply(Owl_data[,-5], z_norm))
Nrm_data$Type <- Owl_data$Type
#Exploring Data
print(ggplot(Owl_data)+
geom_point(aes(x = Body_Length, y = Body_Width, colour = Type))+
ggtitle("Exploring Data")+
xlab("Body Length")+
ylab("Body Width")+
theme(plot.title = element_text(hjust = 0.5)))
print(ggplot(Owl_data)+
geom_point(aes(x = Wing_Length, y = Wing_Width, colour = Type))+
ggtitle("Exploring Data")+
xlab("Wing Length")+
ylab("Wing Width")+
theme(plot.title = element_text(hjust = 0.5)))
#Testing and Training data
set.seed(k+50)
index = sample(1:nrow(Nrm_data), size = (nrow(Nrm_data)*2/3), prob = NULL, replace= FALSE)
Test_data = Nrm_data[-index, ] #This is test data set. Used for validation
Train_data = Nrm_data[index, ] #This data set is further splitted according to the classification for one vs one classification
#Seperating training data set according to the classification for One vs one approach
C1_data <- Train_data[Train_data$Type == "LongEaredOwl", ] #LongEaredOwl Data
C2_data <- Train_data[Train_data$Type == "SnowyOwl", ] #SnowyOwl
C3_data <- Train_data[Train_data$Type == "BarnOwl", ] #BarnOwl
#*************************************************************************************************
#One Vs One classification. Hence Preparing the training data accordingly
#*************************** LongEared vs Snowvy Owl *********************************************
Class_data1 <- rbind(C1_data, C2_data)
Class_data1$Type <- ifelse((Class_data1$Type == "LongEaredOwl"), 1, -1)
Train_data1 <- Class_data1[, -5]
desired_Op_Train1 <- lapply(Class_data1[, 5], function(x){x})[[1]]
#****************************** SnowyOwl vs BarnOwl *******************************************
Class_data2 <- rbind(C2_data, C3_data)
Class_data2$Type <- ifelse((Class_data2$Type == "SnowyOwl"), 1, -1)
Train_data2 <- Class_data2[, -5]
desired_Op_Train2 <- lapply(Class_data2[, 5], function(x){x})[[1]]
#********************************Barn_Owl Vs LongEaredOwl************************************
Class_data3 <- rbind(C1_data, C3_data)
Class_data3$Type <- ifelse((Class_data3$Type == "BarnOwl"), 1, -1)
Train_data3 <- Class_data3[, -5]
desired_Op_Train3 <- lapply(Class_data3[, 5], function(x){x})[[1]]
#********************************************************************************************
# Training weights and bias
#*******************************************************************************************
#Default weight and bias of perceptron algorithm
Default_Bias = 0.03
lr_rate = 1 # Learning rate
Initial_wt = c(0.01,0.01,0.01,0.01)
#Perceptron Training
Train_Perc = function(Train_data, b, w, lr, desired)
{
Bias = b
Weights = w
m = lr#learning rate
desired_op = desired
Predicted_value = vector(mode = 'numeric', length = nrow(Train_data))#to store predicted class
Total_RMSE = vector(mode = 'numeric', length = 100)#To store Root Mean Squared Error for each repeats
#Learning weights to optimally seperate class
Learning_Weights <- function(x, y)
{
Pred_value <- Predict_value(x)
Func_x[y] <<- Pred_value
Error_L <- (desired_op[y] - Pred_value)#Error = Actual - Predicted
Total_Errors <<- c(Total_Errors, Error_L)
if(Error_L != 0)
{
#Updating weights and bias if error is not zero
Bias <<- Bias + m * Error_L
Weights <<- Weights + (Error_L * m * as.numeric(x))
}
}
#Hard threshold
#Prediction based on sum of (weights*X[i]): if sum is greater that 0 predic 1 else predict -1
Predict_value = function(x)
{
value = sum(unlist(c((Weights * as.numeric(x)), Bias)))
Pred_value = ifelse((value > 0), 1, -1)
return(Pred_value)
}
#Epoch which leads to the convergence of error(if linear seperable) or to find effective dicision hyperplane
s = 0
repeat
{
Total_Errors = vector(mode = 'numeric', length = 100)
Func_x = vector(mode = 'numeric', length = nrow(Train_data))
for(i in 1:nrow(Train_data))
{
Learning_Weights(Train_data[i,], i)
}
s = s+1
RMSE = sqrt(sum(Total_Errors^2)/length(desired_op))#RMSE
Total_RMSE[s] <- RMSE
#Condition to go out of repeat
if((RMSE < 0.02) | (s==1000)){
break
}
}
Predicted_value <- Func_x
#Final values of paramter selected
Final_param = list(Bias_va = Bias,
Weight = Weights,
Predicted_value = Predicted_value,
Actual_value = desired_op,
RMSE_Epochs = Total_RMSE
)
return(Final_param)
}
#*****************************************************************************
#Test data prediction and comparing the accuracy
Test = function(h, W, b){
Weights = W
Bias = b
#Prediction based on sum of (weights*X[i]): if sum is greater that 0 predic 1 else predict -1
Test_predict = function(x)
{
for(i in 1:nrow(x))
{
value = sum(unlist(c((Weights * as.numeric(x[i,])), Bias)))
Pred_value = ifelse((value > 0), 1, -1)
Test_predict_val[i] <<- Pred_value
}
}
Test_predict_val = vector(mode = 'numeric', length = nrow(h))
Test_predict(h)
Test_result = list(Predicted = Test_predict_val)
return(Test_result)
}
#Voting system to predict the class = majority voting
Pecp_Predicted_Class = function(Predicted1,Predicted2,Predicted3)
{
Predicted_class = vector(mode = 'numeric', length = nrow(Test_data))
for(i in 1:length(Test_data$Type))
{
if((Predicted1[i] == 1) & (Predicted3[i] == -1)){
Predicted_class[i] = "LongEaredOwl"
}
if((Predicted1[i] == -1) & (Predicted2[i] == 1)){
Predicted_class[i] = "SnowyOwl"
}
if((Predicted2[i] == -1) & (Predicted3[i] == 1)){
Predicted_class[i] = "BarnOwl"
}
}
return(Predicted_class)
}
#**********************************************************************************
#Training the algorithm based on 2/3 of data: Splitting of data has been done at the begining
t = Train_Perc(Train_data1, Default_Bias, Initial_wt, lr_rate, desired_Op_Train1)
e = Train_Perc(Train_data2, Default_Bias, Initial_wt, lr_rate, desired_Op_Train2)
v = Train_Perc(Train_data3, Default_Bias, Initial_wt, lr_rate, desired_Op_Train3)
#Testing the algorithm based on testing data 1/3 (Unseen data) : Splitting of data has been done at the begining
t1 = Test(Test_data[,-5], t$Weight, t$Bias_va)
t2 = Test(Test_data[,-5], e$Weight, e$Bias_va)
t3 = Test(Test_data[,-5], v$Weight, v$Bias_va)
#ggplot to see the error convergence if points are linearly seperable
Learning = function(a, b, c)
{
print(ggplot()+
geom_smooth(aes(x = c(1:150), y = a[1:150], colour = "LongEared vs SnowyOwl"), se = F)+
geom_smooth(aes(x = c(1:150), y = b[1:150], colour = "SnowyOwl vs BarnOwl"), se=F)+
geom_smooth(aes(x = c(1:150), y = c[1:150], colour = "BarnOwl vs LongearedOwl"), se=F)+
ggtitle("Error vs Epoch")+
xlab("Eopchs")+
ylab("RMSE_Train")+
theme(plot.title = element_text(hjust = 0.5)))
}
(Learning(t$RMSE_Epochs, e$RMSE_Epochs, v$RMSE_Epochs))
#Confusion matrix to find the missclassification
confusion_matrix = table(Actual = Test_data$Type, Predicted = Pecp_Predicted_Class(t1$Predicted,t2$Predicted,t3$Predicted))
#To display the final parameters (Epochs and weights), Accuracy and confusion matrix on screen
op_list = list(
Predicted_Class = Pecp_Predicted_Class(t1$Predicted,t2$Predicted,t3$Predicted),
Actual_Class = Test_data$Type,
Confusion_Matrix = confusion_matrix,
Accuracy = ((confusion_matrix["BarnOwl","BarnOwl"]+confusion_matrix["LongEaredOwl","LongEaredOwl"]+confusion_matrix["SnowyOwl","SnowyOwl"])/length(Test_data$Type)),
Classfier1_Weight = c(unlist(t$Weight), Bias = t$Bias_va),
Classfier2_Weight = c(unlist(e$Weight), Bias = e$Bias_va),
Classfier3_Weight = c(unlist(v$Weight), Bias = v$Bias_va),
Epochs_class1 = length(t$RMSE_Epochs),
Epochs_class2 = length(e$RMSE_Epochs),
Epochs_class2 = length(v$RMSE_Epochs)
)
print(op_list)
Estimated_Accuracy[k] <<- unlist(op_list[4])
}
Perceptron_Alg(Data_set) #calling the function to train and test
}
#Mean Accuracy
(Estimated_Accuracy)
cat("Likely Expected Accuracy (mean) on prediction is:", mean(Estimated_Accuracy))
cat("Time Elapsed: ",(Sys.time() - start_time))