-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpearson.py
104 lines (85 loc) · 2.37 KB
/
pearson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import csv
import numpy as np
import copy
import math
items=[]
centroids=[]
closest=[] #contains the closest distance
closest_centroid=[] #cluster number
np.random.seed(200)
k=int(input('Enter the value of k you want: '))
#opening csv file and storing the values of x and y
with open('xy.csv') as csv_file:
csv_reader=csv.reader(csv_file, delimiter=',')
for line in csv_reader:
items.append([float(line[0]),float(line[1])])
#initialising the centroids
for i in range(k):
centroids.append([np.random.randint(0, 80), np.random.randint(0, 80)])
def PearsonCorrelation(item, centroid):
itemmean=0
cenmean=0
for j in range(len(item)):
itemmean+=item[j]
cenmean+=centroid[j]
itemmean=itemmean/float(len(item))
cenmean=cenmean/float(len(centroid))
up=0
dn1=0
dn2=0
for i in range(len(item)):
up+= (item[i] - itemmean) * (centroid[i] - cenmean)
dn1+=math.pow((item[i] - itemmean), 2)
dn2+=math.pow((centroid[i] - cenmean), 2)
if(up!=0):
return up/(math.sqrt(dn1*dn2))
else:
return 1
def Correlation(items,means):
k=len(means)
correlations=[]
for i in range(k):
correlation=[]
for item in items:
cor=PearsonCorrelation(item,means[i])
correlation.append(cor)
correlations.append(correlation)
return correlations
def assignment():
for j in range(len(closest_centroid)):
max=-2
for i in range(len(Correlations)):
if max<Correlations[i][j]:
max=Correlations[i][j]
closest_centroid[j]=i+1
closest_centroid = [0 for i in range(len(items))]
Correlations=Correlation(items,centroids)
assignment()
print(closest_centroid)
def CalculateMean(items,clusters,i,j):
sum=0.0
count=0
z=len(items)
for k in range(z):
if(clusters[k]==j+1):
sum+=items[k][i]
count+=1
if(count!=0):
return(sum/float(count))
else:
return -1
def update():
a=len(centroids)
b=len(centroids[0])
for j in range(a):
for i in range(b):
num=CalculateMean(items,closest_centroid,i,j)
if(num!=-1):
centroids[j][i]=num
while True:
old_centroids=copy.deepcopy(closest_centroid)
update()
assignment()
if old_centroids==closest_centroid:
break
print(closest_centroid)