-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNeural_Network_Code.py
152 lines (122 loc) · 5.24 KB
/
Neural_Network_Code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 27 11:44:24 2017
@author: Abdullah Mobeen
"""
import math
import time
def neuron(prev_act,input_weights):
"""Single Neuron Unit. Takes as input previous activatios and input weights.
Returns the new activation value after applying the sigmoid function"""
z = 0
for i in range(len(prev_act)):
dot = prev_act[i]*input_weights[i]
z += dot
return (1/(1+math.exp(-z)))
def files(data):
"""Takes as input the file name and processes the file to return
vectors and length of vectors"""
with open(data,'r') as f:
w = []
a = f.readlines()
for i in a:
i = i.split(',')
w.append(i)
for i in range(len(w)):
for j in range(len(w[0])):
w[i][j] = float(w[i][j])
return w, len(w)
def data_processing():
"""Calls files(data) function to process all the data files provided in the
Homework. Returns weights between layers, hidden units, output units,
image data, and labels"""
bias = [1.0]
x_vector = files('ps5_data.csv')[0] # x_vector = list of pixels for each image
for i in range(5000):
x_vector[i] = bias + x_vector[i]
with open('ps5_data-labels.csv','r') as f:
labels = f.readlines() #labels = list of labels with which predicted labels will be compared
for i in range(len(labels)):
labels[i] = int(labels[i])-1
weights, total_nodes = files('ps5_theta1.csv')
# weights = array of 25 lists, each list has 401 weights
# total_nodes = total no. of nodes in the hidden layer
weights1, total_nodes1 = files('ps5_theta2.csv')
# weights1 = array of 10 lists, each list has 26 weights
# total_nodes = total no. of nodes in the output layer
return weights, total_nodes, weights1, total_nodes1, x_vector, labels
def forward_propogation(x):
"""Forward Propogation function that takes vector for one single image
as input and returns 10 outputs at output units as one single list"""
global weights, total_nodes, weights1, total_nodes1
l = [1.0]
final_l = []
for i in range(total_nodes):
a = neuron(x,weights[i])
l.append(a)
for i in range(total_nodes1):
a = neuron(l,weights1[i])
final_l.append(a)
return final_l
def image_classifier(x):
"""Takes as input vector of one single image and classifies it as a number
between 0-9. Returns the prediction"""
l = forward_propogation(x)
index = l.index(max(l))
return index
def classification():
"""Function to classify all 5000 images.
Returns the error rate on the entire classification"""
global x, labels
classification_list = []
for i in range(len(x)):
prediction = image_classifier(x[i])
classification_list.append(prediction)
errors = 0 #counter for errors made
for i in range(len(classification_list)):
if classification_list[i] != labels[i]:
errors += 1
return (errors/len(labels))*100
def label_normalization(labels):
"""Takes as input the labels already provided between 0-9.
Returns an array that contains a vector for each label.
For example: if the number is 0, the vector is [1,0,0,0,0,0,0,0,0,0]"""
l = labels
for i in l:
val = i #temporarily stores the value i to be later used as index
x = l[l.index(i)] = [0 for j in range(10)] #changes all the labels to vectors [0,0,0,0,0,0,0,0,0,0]
x[val] = 1 #updates the specific unit to 1
return l
def cost_function(labels, x):
"""Takes as input labels and the image data - x.
Applies the MLE cost function to calculate the cost.
Returns the cost"""
global weights, weights1
new_labels = label_normalization(labels)
predicted_labels = [] #list to collect the predicted labels
for i in range(len(new_labels)):
l = forward_propogation(x[i])
predicted_labels.append(l)
cost = 0
for i in range(5000):
for k in range(10):
c = float(-new_labels[i][k]) * (math.log(predicted_labels[i][k]))\
- (float(1.0 - new_labels[i][k])) * math.log(1.0 - predicted_labels[i][k])
cost += c
cost = cost*(1/len(predicted_labels))
# reg = 0 #Regularization term as normally used in Neural Network Cost functions
# for i in weights:
# for j in i:
# reg = reg + j**2
# for i in weights1:
# for j in i:
# reg = reg + j**2
# reg = reg/(2*len(new_labels))
# cost = cost*(1/len(predicted_labels)) + reg #Cost function with the regularization term
return cost
if __name__ == "__main__":
weights, total_nodes, weights1, total_nodes1, x, labels = data_processing()
start_time = time.time()
print("The error rate when classifying 5000 images: ",classification(),"%\n")
print("Total time taken for classification: ", round(time.time() - start_time,2), 'seconds\n')
print("The cost function correct to 4 d.p: ", round((cost_function(labels,x)),4))