This repository has been archived by the owner on Sep 8, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbot.py
333 lines (292 loc) · 13.2 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#!/usr/bin/env python3
import discord
import json
import logging
import os
import pathlib
import requests
import sys
import time
import urllib
from dotenv import load_dotenv
from urlextract import URLExtract
archive_api = 'https://web.archive.org'
# https://www.haykranen.nl/2016/02/13/handling-complex-nested-dicts-in-python/
class DictQuery(dict):
def get(self, path, default = None):
keys = path.split("/")
val = None
for key in keys:
if val:
if isinstance(val, list):
val = [ v.get(key, default) if v else None for v in val]
else:
val = val.get(key, default)
else:
val = dict.get(self, key, default)
if not val:
break;
return val
class BotState:
def __init__(self):
self.load_config()
def load_config(self):
'''
Initializes the bot state by reading it from a file
'''
state_logger = logging.getLogger('bot')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
state_logger.addHandler(ch)
load_dotenv()
try:
self.config = json.loads(os.environ.get('CONFIG'))
except Exception as e:
state_logger.error(f'$CONFIG environment variable could not be read (exception was {e}), trying to load from config.json')
self.current_dir = str(pathlib.Path(__file__).resolve().parent)
config_file = f'{current_dir}/config.json'
try:
with open(config_file, 'r') as read_file:
try:
self.config = json.load(read_file)
except Exception as e:
logger.error(f'Unable to read config file at {config_file}, {e}')
sys.exit(1)
except Exception as e:
logger.warning(f'Config file not found at {config_file}, exiting')
sys.exit(1)
state_logger.removeHandler(ch)
# This object keeps track of handled messages.
self.handled_messages = []
async def send_dm(user=None, text=None, embed=None):
'''
Sends a user a DM with the text string provided
'''
if not user:
logger.error(f'send_dm called without user', extra={'guild': 'internal'})
else:
logger.debug(msg=f'Sending a message to {user.name}', extra={'guild': 'internal'})
if embed:
return await user.send(embed=embed)
elif text:
return await user.send(text)
else:
logger.error(f'send_dm called without text or embed', extra={'guild': 'internal'})
async def send_to_channel(channel=None, message=None, text=None, embed=None):
'''
Sends a user a DM with the text string provided
'''
if not channel:
logger.error(f'send_to_channel called without channel', extra={'guild': 'internal'})
else:
logger.debug(msg=f'Sending a message to the channel {channel.name}', extra={'guild': f'{channel.guild.id} | {channel.guild.name}'})
if embed:
return await channel.send(embed=embed, reference=message, mention_author=False)
elif text:
return await channel.send(text, reference=message, mention_author=False)
else:
logger.error(f'send_to_channel called without text or embed', extra={'guild': 'internal'})
async def respond_to_user(message=None, user=None, text=None, embed=None, repeat_react=None):
'''
Either sends a DM or replies to a channel depending on config.
'''
target = bot_state.config['messageTarget']
logger.info(f'Sending text {text}, target was {target}, message channel was {message.channel}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
if target == 'user' or message.channel == None:
if embed is not None:
await send_dm(user, embed)
else:
await send_dm(user, text)
else:
if message.id not in bot_state.handled_messages or repeat_react == True:
if embed is not None:
await send_to_channel(message.channel, message, embed)
else:
await send_to_channel(message.channel, message, text)
bot_state.handled_messages.append(message.id)
else:
logger.info(f'Message with ID {message.id} has already been responded to and repeat react not used', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
def save_page(url):
'''
Saves the page using an HTTP get, returns the response object
'''
logger.debug(f'Saving page {url}', extra={'guild': 'internal'})
response = requests.get(archive_api + '/save/' + url, allow_redirects=False)
logger.debug(f'{url} saved', extra={'guild': 'internal'})
return response
async def handle_archive_react(extractor, message, user):
'''
Finds links in the message that was reacted to and messages archive.org links to the user who reacted
'''
logger.info(msg=f'Handling archive react on message {str(message.id)} in channel {str(message.channel.id)}, link for context: https://discord.com/channels/{str(message.guild.id)}/{str(message.channel.id)}/{str(message.id)}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
urls = extractor.find_urls(message.content)
if urls:
for url in urls:
logger.debug(msg=f'URL found: {url}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
wayback_response = requests.get(archive_api + '/wayback/available?url=' + urllib.parse.quote(url)).json()
logger.debug(msg=f'Wayback response: {str(wayback_response)}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
wayback_url = DictQuery(wayback_response).get('archived_snapshots/closest/url')
if wayback_url:
await respond_to_user(message, user, wayback_url)
else:
logger.info(msg=f'Wayback did not have the URL {url}, requesting that it be archived', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
try:
response = save_page(url)
except Exception as e:
logger.error(msg=f'There was a problem making the request, exception: {e}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
return
await handle_page_save_request(message, user, url, response, False)
async def handle_repeat_react(extractor, message, user):
'''
Rearchives a link and sends the user who reacted a link to the new archive page
'''
logger.info(f'Handling repeat react on message {message.id}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
urls = extractor.find_urls(message.content)
if urls:
for url in urls:
try:
response = save_page(url)
except Exception as e:
logger.error(f'Error saving page {url}: {e}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
try:
await handle_page_save_request(message, user, url, response, True)
except Exception as e:
logger.error(f'Error handling page save request: {e}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
async def handle_page_save_request(message, user, url, response, repeat_react):
'''
Sends a DM if the page save request was successful, if not checks if the page was just saved and sends that. Otherwise, logs the error
'''
if response.status_code not in [302,301]:
if response.status_code in [523,520]:
logger.debug(msg=f'Wayback did not proxy the request for url {url}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
await send_dm(user, 'The Internet Archive declined to crawl the link you reacted to. Sorry.')
else:
logger.error(msg=f'Something\'s wrong, we tried to save the page but we were not redirected. Message ID: {message.id}, URL: {url}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
logger.debug(msg=f'Status code: {response.status_code}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
logger.debug(msg=f'{response.content}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
else:
try:
wayback_url = response.headers['Location']
await respond_to_user(message, user, text=wayback_url, repeat_react=repeat_react)
except:
# sometimes archive.org sends back a lowercase location header
try:
wayback_url = response.headers['location']
await respond_to_user(message, user, text=wayback_url, repeat_react=repeat_react)
except:
logger.error(msg=f'Unable to extract location from response and send DM. Message ID: {str(message.id)}, URL: {url}', extra={'guild': f'{message.guild.id} | {message.guild.name}'})
logger.error(msg=f'Response content: \n' + str(response.content), extra={'guild': f'{message.guild.id} | {message.guild.name}'})
logger.error(msg=f'Headers: \n' + str(response.headers), extra={'guild': f'{message.guild.id} | {message.guild.name}'})
logger.error(msg=f'Status Code: \n' + str(response.status_code), extra={'guild': f'{message.guild.id} | {message.guild.name}'})
async def status_command(bot_state, client, message):
config = bot_state.config
# only administrators can use this command
if message.author.id not in config['administratorIds']:
logger.debug(f'Status command called but {message.author.id} is not in administratorIds', extra={'guild': 'internal'})
else:
guild_list = ''
i = 0
for guild in client.guilds:
if i > 0:
guild_list = f'{guild_list}, {guild.name}'
else:
guild_list = f'{guild.name}'
i = i + 1
embed = discord.Embed()
embed.title = 'Archive.org status'
embed.color = 16753920 # orange
embed.add_field(name='Guild list', value=guild_list, inline=False)
embed.add_field(name='Cached messages', value=str(len(client.cached_messages)), inline=False)
embed.add_field(name='Private messages', value=str(len(client.private_channels)), inline=False)
embed.add_field(name='Response messages', value=str(len(bot_state.handled_messages)), inline=False)
await send_dm(message.author, embed=embed)
async def update_activity(bot_state, client, message=None):
await client.change_presence(
activity=discord.Activity(
status=discord.Status.online,
type=discord.ActivityType.watching,
name=f'{len(client.guilds)} servers'))
def main(bot_state):
logger.info(msg=f'Starting bot...', extra={'guild': 'internal'})
discordToken = bot_state.config['discordToken']
client = discord.Client()
possible_commands={
'!archivestatus': 'status_command'
}
@client.event
async def on_ready():
logger.info(msg=f'{client.user} has connected to Discord!', extra={'guild': 'internal'})
await update_activity(bot_state, client)
@client.event
async def on_message(message):
if message.author == client.user:
return
try:
guild = message.guild.id
except:
guild = 'direct'
for command in possible_commands:
if message.content.split(' ')[0] == command:
function = possible_commands[message.content.split(' ')[0]]
call_function = globals()[function]
logger.debug(f'Calling {function}', extra={'guild': guild})
await call_function(bot_state, client, message)
@client.event
async def on_reaction_add(reaction, user):
if reaction.emoji == '🏛️':
try:
await handle_archive_react(extractor, reaction.message, user)
except Exception as e:
logger.error(msg=f'Error calling handle_archive_react, exception: {e}', extra={'guild': f'{reaction.message.guild.id} | {reaction.message.guild.name}'})
elif reaction.emoji == '🔁':
try:
await handle_repeat_react(extractor, reaction.message, user)
except Exception as e:
logger.error(msg=f'Error calling handle_repeat_react, exception: {e}', extra={'guild': f'{reaction.message.guild.id} | {reaction.message.guild.name}'})
@client.event
async def on_guild_join(guild):
logger.info(f'Joined guild {guild.name}', extra={'guild': f'{guild.id} | {guild.name}'})
await update_activity(bot_state, client)
@client.event
async def on_guild_remove(guild):
logger.info(f'Left guild {guild.name}', extra={'guild': f'{guild.id} | {guild.name}'})
await update_activity(bot_state, client)
client.run(discordToken)
if __name__ == '__main__':
current_dir = pathlib.Path(__file__).resolve().parent
# Init state
bot_state = BotState()
config = bot_state.config
time.tzset()
# Set up logging to console and file
logger = logging.getLogger('bot')
formatter = logging.Formatter('%(asctime)s - %(guild)s - %(levelname)s - %(message)s')
if config['logOutput'] == "file" or config['logOutput'] == "both":
fh = logging.FileHandler(str(current_dir) + '/bot.log')
fh.setFormatter(formatter)
logger.addHandler(fh)
if config['logOutput'] == "stdout" or config['logOutput'] == "both":
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
# Set loglevel
level_config = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warn': logging.WARNING,
'error': logging.ERROR
}
if 'logLevel' in config:
loglevel = config['logLevel']
logger.setLevel(level_config[loglevel])
logger.info(msg=f'Logging set to {config["logLevel"]}...', extra={'guild': 'internal'})
else:
logger.setLevel(logging.WARN)
logger.warn(msg=f'Logging set to warn...', extra={'guild': 'internal'})
if 'discordToken' not in config:
logger.error(msg=f'\'discordToken\' is not set in config', extra={'guild': 'internal'})
sys.exit(1)
discordToken = config['discordToken']
extractor = URLExtract()
main(bot_state)