-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_reviews.py
49 lines (42 loc) · 1.9 KB
/
fetch_reviews.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#Import required libraries
from google_play_scraper import app, Sort, reviews_all, search, reviews
import pandas as pd
#Search for apps for google playstore
keyword = "Scientific Calculator"
result = search(
keyword,
lang="en", # defaults to 'en'
country="in", # defaults to 'us'
n_hits=10 # defaults to 30 (= Google's maximum)
)
#Store search resuts
search_results = pd.DataFrame(result)
all_app_data = []
#Receive detailed app information one by one and store in all_app_data
for idx, row in search_results.iterrows():
app_data = app(
row['appId'],
lang='en', # defaults to 'en'
country='in' # defaults to 'us'
)
all_app_data.append(app_data)
#convert list of dictionary to pandas dataframe
data = pd.DataFrame(all_app_data)
#Dropping unnecessary documents
data.drop(['installs','minInstalls','free','currency','sale','saleTime','originalPrice','saleText','offersIAP','inAppProductPrice','developer','developerId',\
'developer', 'developerEmail','developerWebsite','developerAddress','privacyPolicy','genre','genreId','categories',
'headerImage','screenshots','video','videoImage','contentRating','contentRatingDescription','adSupported','updated','version','comments'], axis=1, inplace=True)
#Save app infor to csv file
data.to_csv("Top10_"+keyword+"_Apps.csv")
print("Fetched top 10 "+keyword+" apps")
#Fetch latest 5k reviews of above apps one by one and save to specific csv files
for idx, row in search_results.iterrows():
print("Fetching reviews for " + row['appId'])
g_reviews, token = reviews(
row['appId'],
lang='en', # defaults to 'en'
country='in', # defaults to 'us'
sort=Sort.NEWEST, # defaults to Sort.MOST_RELEVANT
count = 5000
)
pd.DataFrame(g_reviews).to_csv("reviews/"+str(row['appId'])+".csv")