-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathanalysis-rest.py
116 lines (93 loc) · 3.79 KB
/
analysis-rest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- coding: utf-8 -*-
from pymongo import MongoClient
from geopy.geocoders import Nominatim
import numpy as np
import pandas as pd
class AnalyzeRestaurantItem(object):
db_name = 'restaurantinfo'
info_fields = ['restaurant name', 'latitude', 'longitude', 'street', 'city', \
'country', 'rating', 'price', 'review excellent count', \
'review good count', 'review avg count', 'review poor count', \
'review terrible count', 'positive review count', 'negative review count', \
'total reviews']
cuisine_fields = ['restaurant name', 'cuisine']
features_fields = ['restaurant name', 'feature']
meals_fields = ['restaurant name', 'meal']
info_df = None
cuisines_df = None
features_df = None
meals_df = None
def __init__(self):
#Setup Client for MongoDB
self.client = MongoClient('mongodb://localhost:27017/restaurantinfo')
self.db = self.client[self.db_name]
def convert_addr_to_coord(self, addr):
geolocator = Nominatim()
location = geolocator.geocode(addr, timeout=3)
if location:
return location.latitude, location.longitude
else:
return 0,0
def load_mongodb_to_pandas(self):
rest_info = []
rest_cuisine = []
rest_features = []
rest_meals = []
for doc in self.db.restaurantreviews.find():
street = doc['rest_street']
city = doc['rest_city']
country = doc['rest_country']
lat, lon = self.convert_addr_to_coord(street + ", " + city + ", " + country)
if (lat != 0 and lon != 0 and
doc['rest_cuisines'] != None and
doc['rest_features'] != None and
doc['rest_meals'] != None):
positive_review_count = int(doc['review_excellent_count']) + \
int(doc['review_good_count'])
negative_review_count = int(doc['review_avg_count']) + \
int(doc['review_poor_count']) + \
int(doc['review_terrible_count'])
# process and categorize price
price = doc['rest_price'].replace(" - ", "")
if (len(price) == 1):
rest_price = "AFFORDABLE"
elif(len(price) == 4):
rest_price = "MODERATE"
else:
rest_price = "EXPENSIVE"
# load restaurant details, reviews
rest_info.append([doc['rest_name'], float(lat), float(lon), doc['rest_street'], \
doc['rest_city'], doc['rest_country'], doc['rest_rating'], rest_price, \
doc['review_excellent_count'], doc['review_good_count'], doc['review_avg_count'], \
doc['review_poor_count'], doc['review_terrible_count'], \
positive_review_count, negative_review_count, doc['rest_total_reviews']])
# load restaurant cuisines, price data
if doc['rest_cuisines']:
cuisines = doc['rest_cuisines'].split(',')
for cuisine in cuisines:
rest_cuisine.append([doc['rest_name'], cuisine.strip("\n")])
# load restaurant features
if doc['rest_features']:
features = doc['rest_features'].split(',')
for feature in features:
rest_features.append([doc['rest_name'], feature.strip("\n")])
# load restaurant meals
if doc['rest_meals']:
meals = doc['rest_meals'].split(',')
for meal in meals:
rest_meals.append([doc['rest_name'], meal.strip("\n")])
self.info_df = pd.DataFrame(rest_info, columns=self.info_fields)
self.cuisines_df = pd.DataFrame(rest_cuisine, columns=self.cuisine_fields)
self.features_df = pd.DataFrame(rest_features, columns=self.features_fields)
self.meals_df = pd.DataFrame(rest_meals, columns=self.meals_fields)
self.info_df.to_csv('data/restaurant_info.csv', index=False)
self.cuisines_df.to_csv('data/cuisines_info.csv', index=False)
self.features_df.to_csv('data/features_info.csv', index=False)
self.meals_df.to_csv('data/meals_info.csv', index=False)
print(self.info_df)
print(self.cuisines_df)
print(self.features_df)
print(self.meals_df)
if __name__ == '__main__':
analyze = AnalyzeRestaurantItem()
analyze.load_mongodb_to_pandas()