-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitterbot-SmartNews.py
executable file
·157 lines (147 loc) · 6.38 KB
/
twitterbot-SmartNews.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# -*- coding: utf-8 -*-
# pip install python-twitter
import twitter
import re
from time import sleep
import random
import urllib2
import urllib
import sys
import datetime
import os
# Proxy user Only
#proxy = 'http://192.168.0.46:8118'
#sproxy = 'https://192.168.0.46:8118'
# Getting proxy from evironement
#os.environ['http_proxy'] = proxy
#os.environ['HTTP_PROXY'] = proxy
#os.environ['https_proxy'] = sproxy
#os.environ['HTTPS_PROXY'] = sproxy
consumerKey = os.environ['consumer_key']
consumerSecret = os.environ['consumer_secret']
accessTokenKey = os.environ['access_token_key']
accessTokenSecret = os.environ['access_token_secret']
def findTitle(url):
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0' }
final_url = urllib2.urlopen(urllib2.Request(url)).geturl()
req = urllib2.Request(final_url, None, headers)
webpage = urllib2.urlopen(req).read()
title = str(webpage).split('<title>')[1].split('</title>')[0]
return title
def findcontent(url):
content = []
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0' }
final_url = urllib2.urlopen(urllib2.Request(url)).geturl()
req = urllib2.Request(final_url, None, headers)
webpage = urllib2.urlopen(req).read()
content.append(webpage)
title = str(webpage).split('<title>')[1].split('</title>')[0]
content.append(title)
return content
def findSource(url):
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0' }
final_url = urllib2.urlopen(urllib2.Request(url)).geturl()
req = urllib2.Request(final_url, None, headers)
source = urllib2.urlopen(req).read()
return source
if __name__ == '__main__':
#title = findTitle("https://techcrunch.com/2017/10/28/new-seed-funds-pursue-ai-hard-tech-and-the-midwest/?utm_content=buffercbcca&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer")
#print "========" + title
hashtag = ""
while True:
api = twitter.Api(consumer_key=consumerKey,
consumer_secret=consumerSecret,
access_token_key=accessTokenKey,
access_token_secret=accessTokenSecret
)
list = api.GetListMembers(list_id='904980544005574656')
for l in list:
member = str(l.screen_name)
User = api.GetUser(screen_name = member )
statuses = api.GetUserTimeline(User.id, count=50)
print(member)
try:
#trend = api.GetTrendsCurrent()
hashtag = ["robot","cloud", "azure", "aws", "apple", "tesla", "uber" ," facebook ", "linux","fintech", "lifehacking" ,"google ", "docker", "devops", "bigdata", "datascience", "bitcoin", "IOT ", "AI ", "hack", "hacking", "lifestyle"]
#print trend
except twitter.error.TwitterError:
print ("Get trend limited rate using default")
for s in statuses:
#print s.text
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', s.text)
try:
url = str(urls[0])
content = findcontent(url)
#print content
source = content[0]
title = content[1]
#print urls
print "--+" + url #+ " " + " --- " + source
except:
#print ('no url')
continue
#for t in trend:
list_h = []
for h in hashtag:
try:
#title = findTitle(url)
#print "----> " + title
#print u + ' ' + t.name.decode("utf8").strip('#').lower() + ' not in ' + title.lower()
if h.strip('#').lower() in title.decode("utf8").lower() or h.strip('#').lower() in title.decode("utf8").replace(" ", "").lower() or h.strip('#').lower() in url.decode("utf8").lower() or h.decode("utf8").strip('#').replace(" ", "").lower() in source.decode("utf8").lower():
list_h.append(h.replace(" ", ""))
print " + " + h
#Message = "#"+ h.replace(" ", "") + " #fintech " + url + " " + title
else:
print " " + h
# print urllib2.urlopen(urllib2.Request(url)).geturl() + ' ' + h.decode("utf8").strip('#').lower() + ' not in ' + title
except:
e = sys.exc_info()[0]
print (e)
continue
print list_h
if len(list_h):
Message = "#" + " #".join(list_h) + " #fintech " + url + " "
print "To be sent :--" + Message + "---"
try:
status = api.PostUpdate(Message)
print (status.text)
print str(datetime.datetime.now()) + ' =============-> OK sent ' + Message
#timer = random.randint(1200,1800)
timer = random.randint(700,1111)
print 'sleep waiting for another shot ' + str(timer)
sleep(timer)
except twitter.error.TwitterError as e:
if str(e) == "[{u'message': u'You have already retweeted this tweet.', u'code': 327}]":
print(e)
continue
elif str(e) == "[{u'message': u'Status is a duplicate.', u'code': 187}]":
print ("Duplicated")
continue
elif str(e) == "[{u'message': u'Rate limit exceeded', u'code': 88}]":
print ("Rate limit")
print(e)
print "Waiting 100 sec"
sleep(100)
elif str(e) == "[{u'message': u'User is over daily status update limit.', u'code': 185}]":
print ("daily update limit")
print(e)
print "Waiting 200 sec"
sleep(1800)
continue
elif str(e) == "[{u'message': u'Invalid or expired token.', u'code': 89}]":
print ("Check Token")
print(e)
print "Waiting 200 sec"
sleep(200)
continue
elif str(e) == "[{u'message': u'To protect our users from spam and other malicious activity, this account is temporarily locked. Please log in to https://twitter.com to unlock your account.', u'code': 326}]":
print ("Account locked spam suspecion")
print(e)
print "Waiting 200 sec"
sleep(200)
continue
else:
print(e)
continue
else:
print "no match found"