-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRecommendUser.py
176 lines (165 loc) · 8.47 KB
/
RecommendUser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import credentials
import numpy as np
import math
from sklearn.neighbors import NearestNeighbors
from RecommendData import RecommendData
from pprint import pprint
class RecommendUser():
def __init__(self, rec_data, k=5, alpha=0.45, beta=10, pseudo_rating=3.0):
# Model parameters
self.data = rec_data
self.k = k
self.alpha = alpha
self.beta = beta
self.pseudo_rating = pseudo_rating
# Recommendations
self.like_rest_id_list = None
self.like_rev_vector = None
self.nearest_users_id_list = None
self.nearest_users_dist_array = None
self.nearest_users_common_rest_array = None
self.unrated_rest_id_list = None
self.user_weight_array = None
self.predicted_stars_list = None
self.printable_result = None
# Nearest Neighbors model
self.nn_model = NearestNeighbors(n_neighbors=k,
metric='correlation',
algorithm='brute')
self.nn_model.fit(self.data.rev_matrix_pca)
def index_to_id(self, index_list, full_id_list):
""" Converts list of index (row or column) to list of ids (user_id or rest_id) """
return [full_id_list[i] for i in index_list]
def id_to_index(self, id_list, full_id_list):
""" Converts list of ids (user_id or rest_id) to list of index (row or column) """
return [full_id_list.index(x) for x in id_list]
def get_like_rev_vector(self):
""" Given list of liked restaurant ids, returns vector
for computing similarity, e.g. [0, 0, 5, 0, ..., 0, 5, 0] """
rest_list = list(self.data.rest_id_array)
user_stars = [0 for i in range(len(rest_list))]
for i in self.like_rest_id_list:
if i in rest_list:
user_stars[rest_list.index(i)] = 5
self.like_rev_vector = user_stars
def get_nearest_users(self):
""" Compute nearest neighbors and update nearest users """
like_pca = self.data.pca_model.transform(self.like_rev_vector)
kneigh = self.nn_model.kneighbors(like_pca)
nearest_users_index = kneigh[1][0]
self.nearest_users_id_list = self.index_to_id(nearest_users_index,
self.data.user_id_array)
self.nearest_users_dist_array = kneigh[0][0]
def get_suggested_rest_list(self):
""" Create a list of unrated rest_ids basing on
the reviews of the nearest users """
like_rest_ix_list = self.id_to_index(self.like_rest_id_list,
list(self.data.rest_id_array))
nearest_users_ix_list = self.id_to_index(self.nearest_users_id_list,
list(self.data.user_id_array))
unrated_rest_id_list = []
self.nearest_users_common_rest_array = np.array([], dtype=np.int16)
for user in nearest_users_ix_list:
common_rest = [i for i in
list(np.nonzero(np.array(self.data.rev_matrix[user]))[0])
if i in like_rest_ix_list]
not_common_rest = [i for i in
list(np.nonzero(np.array(self.data.rev_matrix[user]))[0])
if i not in like_rest_ix_list]
self.nearest_users_common_rest_array =\
np.append(self.nearest_users_common_rest_array, len(common_rest))
unrated_rest_id_list.append(not_common_rest)
unrated_rest_id_list = [item for sublist in unrated_rest_id_list for item in sublist]
unrated_rest_id_list = list(set(unrated_rest_id_list))
self.unrated_rest_id_list = self.index_to_id(unrated_rest_id_list,
list(self.data.rest_id_array))
def get_user_weight_array(self):
""" Create an array of weights for the nearest users """
nearest_users_ix_list = self.id_to_index(self.nearest_users_id_list,
list(self.data.user_id_array))
nearest_users_total_reviews_list =\
[i for n, i in enumerate(self.data.user_total_rev_array)\
if n in nearest_users_ix_list]
nearest_users_total_thanks_list =\
[i for n, i in enumerate(self.data.user_total_thanks_array)\
if n in nearest_users_ix_list]
weights = []
for n in range(len(nearest_users_ix_list)):
weight = self.nearest_users_dist_array[n]*\
math.sqrt(self.nearest_users_common_rest_array[n])*\
math.log(nearest_users_total_reviews_list[n]+1)*\
math.log(nearest_users_total_thanks_list[n]+1)
weight = float(weight)
weights.append(weight)
self.user_weight_array = np.array(weights)
def get_predicted_stars_list(self):
""" Compute the predicted stars for the unrated restaurants basing
on the weights array """
unrated_rest_ix_list = self.id_to_index(self.unrated_rest_id_list,
list(self.data.rest_id_array))
nearest_users_ix_list = self.id_to_index(self.nearest_users_id_list,
list(self.data.user_id_array))
predicted_rest_list = []
printable_result = []
user_rated_this_list = []
for m, rest in enumerate(unrated_rest_ix_list):
predicted_stars = 0
norm = 0
user_rated_this = []
# posterior: stars given by nearest users
for n, user in enumerate(nearest_users_ix_list):
stars = self.data.rev_matrix[user][rest]
if stars:
predicted_stars += stars * self.user_weight_array[n]
norm += self.user_weight_array[n]
user_rated_this.append(self.nearest_users_id_list[n])
mystars = stars #tracking purpose only
dist = self.nearest_users_dist_array[n]# tracking purpose only
user_rated_this_list.append(user_rated_this)
# add the prior: avg_stars and smoothing with total restaurant reviews
avg_stars = float(self.data.rest_avg_stars_array[rest])
total_reviews = float(self.data.rest_total_rev_array[rest])
predicted_stars += self.alpha*(math.sqrt(total_reviews+self.beta))*\
(avg_stars*total_reviews+self.pseudo_rating*self.beta)/\
(total_reviews+self.beta)
norm += self.alpha*(math.sqrt(total_reviews+self.beta))
predicted_stars = float(predicted_stars) / norm
predicted_rest_list.append(predicted_stars)
printable_result.append((rest, predicted_stars, mystars,
user_rated_this, total_reviews, avg_stars, dist,
self.data.rest_name_array[rest],
self.data.rest_city_array[rest],
self.data.rest_geo_array[rest],
self.data.rest_price_array[rest]))
self.predicted_stars_list = predicted_rest_list
self.printable_result = printable_result
def fit(self, like_rest_id_list):
""" Compute recommendations given a list of liked rest_ids """
# Store the list of liked rest_ids
self.like_rest_id_list = like_rest_id_list
# Update stars vector for the like_list
self.get_like_rev_vector()
# Update nearest users
self.get_nearest_users()
# Update restaurants suggested
self.get_suggested_rest_list()
# Update nearest user "trustability"
self.get_user_weight_array()
# Predict stars for restaurants suggested
self.get_predicted_stars_list()
if __name__ == "__main__":
rec_d = RecommendData()
rec_d.fit()
print 'Matrix fitted'
rec_u = RecommendUser(rec_d, k=5)
rec_u.fit([757, 464, 699, 2352, 3486, 1128])
print 'Nearest user_id:', rec_u.nearest_users_id_list
print 'Unrated rest_id:', rec_u.unrated_rest_id_list
print 'User weights', rec_u.user_weight_array
print 'Commonality', rec_u.nearest_users_common_rest_array
print 'Result: rest, predicted_stars, user_stars, user_rated_this, total_reviews, avg_stars, dist, name, city, geo, price'
result = rec_u.printable_result
sort = sorted(result, key=lambda tup: tup[1], reverse=True)
for i in sort:
print i
print "All tests passed."