-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtemporary_test.py
80 lines (60 loc) · 2.28 KB
/
temporary_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pickle
import pandas as pd
from sklearn.externals import joblib
from mongoengine import connect
from models import DealW2v
from models import PosData
HISTORY_FROM='04-01'
HISTORY_TO='04-10'
connect('wepickw2v',host='mongodb://localhost')
profile_data_path='profile_'+HISTORY_FROM+'_'+HISTORY_TO+'.csv'
profile_df=pd.read_csv(profile_data_path,index_col=0)
scaler=joblib.load('scaler.pkl')
goal_data=PosData.objects(TransDate='2018-04-11 21',WepickRank__gte=20).aggregate(
*[{'$group':{'_id':'$DealId'}}],allowDiskUse=True)
goal_list=[elem['_id'] for elem in goal_data]
goal_list_final=[]
goal_vec=[]
for id in goal_list:
deal=DealW2v.objects(pk=id).first()
if deal != None:
deal_vec=deal.vectorizedWords
goal_list_final.append(id)
goal_vec.append(deal_vec)
scaler=joblib.load('scaler.pkl')
wplr=joblib.load('wplr.pkl')
gbc=joblib.load('wpgbc.pkl')
sample_user=profile_df.iloc[8001:8030]
user_ids=sample_user.index.tolist()
userdata=PosData.objects(UserId__in=user_ids,TransDate__gte='2018-'+HISTORY_FROM+' 00',TransDate__lte='2018-'+HISTORY_TO+' 23',WepickRank__gte=20,WepickRank__lte=55).aggregate(
*[{'$group':{'_id':'$UserId','docs':{'$push':'$DealId'}}}],allowDiskUse=True)
histdata=[]
hist_columns=['hist_{}'.format(i+1) for i in range(40)]
for user in userdata:
temp=[0]*40
i=0
for elem in user['docs']:
deal=DealW2v.objects(pk=elem).first()
if deal != None:
deal_vec=deal.vectorizedWords
if len(deal_vec)==100:
if deal_vec[0]!=0 and deal_vec[1]!=0 and deal_vec[2]!=0:
temp[i]=elem
i+=1
histdata.append(temp)
hist_df=pd.DataFrame(histdata,index=user_ids,columns=hist_columns)
hist_df.to_csv('random_user_history.csv')
# user profiles X goal_vec s
lr_results=[]
gbc_results=[]
for user,profile in sample_user.iterrows():
input=[]
for vec in goal_vec:
input.append(profile.tolist()+vec)
scaled=scaler.transform(input)
lr_results.append(wplr.predict_proba(scaled)[:,1])
gbc_results.append(gbc.predict_proba(scaled)[:,1])
lr_df=pd.DataFrame(lr_results,user_ids,goal_list_final)
gbc_df=pd.DataFrame(gbc_results,user_ids,goal_list_final)
lr_df.to_csv('lr_review_0411_21.csv')
gbc_df.to_csv('gbc_review_0411_21.csv')