-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcentroid_insert.py
55 lines (44 loc) · 1.66 KB
/
centroid_insert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
from pymongo import MongoClient
# yourdate = dateutil.parser.parse(datestring)
client = MongoClient()
db = client.twitter
users = db.users
community = db.community
query = {}
projection = {'_id': 0, 'user.name': 0, 'user.screen_name': 0,
'user.url': 0, 'user.description': 0, 'date_last_tweet': 0,
'user.lang': 0, 'tdidf': 0, 'user.created_at': 0}
def boolkeystoint(doc):
for key, value in doc.items():
if type(doc[key]) == type(True):
doc[key] = int(doc[key])
if type(doc[key]) == type(doc):
for key2, value in doc[key].items():
if type(doc[key][key2]) == type(True):
doc[key][key2] = int(doc[key][key2])
def find(query):
try:
cursor = users.find(query, projection)
except Exception as e:
print("Unexpected error:", type(e), e)
centroidlist = []
for doc in cursor:
centroid = []
boolkeystoint(doc)
for key, value in doc.items():
if type(doc[key]) != type(doc) and type(doc[key]) != type([]):
centroid.append(doc[key])
elif type(doc[key]) != type([]):
for key2, value in doc[key].items():
if key2 == 'id':
pass
else:
centroid.append(doc['user'][key2])
print(centroid)
centroidlist.append(centroid)
users.update_one({'user.id': doc['user']['id']}, {'$set': {'centroid': centroid}})
meancentroid = np.mean(np.array(centroidlist), axis=0)
print(meancentroid)
community.insert_one({'centroid': meancentroid.tolist()})
find(query)