forked from bigsnarfdude/machineLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitter_christmas_scraper.py
69 lines (53 loc) · 1.97 KB
/
twitter_christmas_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import tweetstream
# query twitter stream for specific people
with tweetstream.FilterStream(username, password, follow=people) as stream:
for tweet in stream:
print tweet
# query for screen names of friends
for friend in tweepy.api.friends("some_twitter_user"):
print friend.screen_name
# queery for ID number for each friend
for friend in tweepy.api.friends("some_twitter_user"):
print friend.id
# put it to a list
friend_list=[]
for friend in tweepy.api.friends("some_twitter_user"):
friend_list.append(friend.id)
# all locations not NoneType
for follower in tweepy.api.followers("some_twitter_user"):
if isinstance(follower.location, types.NoneType):
pass
else:
print follower.name, repr(follower.location)
# printing homophobic tweets
with tweetstream.FilterStream(username, password, track=["faggot","homo", "fag"]) as stream:
for tweet in stream:
print tweet['text']
# counter for homophobic tweets per minute?
with tweetstream.FilterStream(username, password, track=["faggot","homo", "fag"]) as stream:
for tweet in stream:
print tweet['text']
print 20*"*", count
count +=1
import codecs
with tweetstream.FilterStream(username, password, track=["didn't get"]) as stream:
for tweet in stream:
print 20*"*", count
count +=1
print tweet['text']
#document = re.sub('[%s]' % re.escape(string.punctuation), '', tweet['text'])
#print document
with codecs.open('tweet_christmas3', mode='at', encoding='utf-8') as f:
f.write(tweet['text']+'|')
# same version above with tweetstream
#!/usr/bin/env python
import tweetstream
import pymongo
connection = pymongo.Connection("localhost", 27017)
db = connection.election
username = "hahaha_nice_try"
password = "************"
words = ["iPhone", "iPad", "MacBook"]
with tweetstream.FilterStream(username, password, track=words) as stream:
for tweet in stream:
db.tweets.save(tweet)