forked from ShriyaShankar/mini-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreaming.py
82 lines (66 loc) · 2.33 KB
/
streaming.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import numpy as np
from tweepy import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
import pandas as pd
import preprocessor as preprocess
from framework import civic_framework
import time
# Access Credentials from Twitter
access_token = '995926266032242688-Nm0o3MJLqDywYV08HfIeKkIWO9ryrLA'
access_token_secret = 'o5qrF3MdtXDRmKX1oPk8mISZyXYkUppJlNf9noWaoCaSm'
consumer_key = 'AO2Gys8oVrF8NP7jr081XUnIO'
consumer_key_secret = '1HDS8CNsRE9iaaZLb1Ye9jjTL4Pc10EDgvbF7KJxxWECkjjhvh'
tweets_data = []
def get_tweet(line):
try:
tweet = json.loads(line)
tweets_data.append(tweet)
except:
print("Unable to convert")
class StdOutListener(StreamListener):
def on_data(self, data):
global tweet_count
global n_tweets
global stream
# tweets_data = []
if tweet_count < n_tweets:
get_tweet(data)
tweet_count+=1
return True
else:
stream.disconnect()
def on_error(self, status):
print(status)
def clean_tweets(text):
text = preprocess.clean(text)
return text
def convert_to_df():
tweets = pd.DataFrame()
tweets['description'] = list(map(lambda tweet: tweet['text'], tweets_data))
tweets['username'] = list(map(lambda tweet: tweet['user']['screen_name'], tweets_data))
tweets['timestamp'] = list(map(lambda tweet: tweet['created_at'], tweets_data))
# filename = 'streaming_tweets.json'
# tweets.to_json(filename)
return tweets
# tweets.head()
# stream(0,10)
if __name__ == "__main__":
start_time = time.time()
tweet_count = 0
n_tweets = int(input("Enter number of tweets to scrape: "))
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_key_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.filter(locations=[77.148514,12.733452,78.015747,13.141672])
tweets = convert_to_df()
tweets['description'] = tweets['description'].apply(clean_tweets)
# print(tweets.head())
civic_framework(tweets)
end_time = time.time()
time_taken = end_time - start_time
print("\n\n---------------------------------------------------------------------")
print("Time Taken: {0:.2f} seconds".format(time_taken))