-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscanner.py
38 lines (28 loc) · 956 Bytes
/
scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
##
##
##
import re
import urllib
#with open('/mnt/c/Users/.../Virusemails.txt') as f:
with open('/home/Virusemails.txt') as f:
text = f.read()
href_regex = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
#href_regex = '(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
urls = re.findall(href_regex, text)
#href_regex_dns = '@\S+[.]\S+'
href_regex_dns = '@S+([\w-]+\.)+[\w-]{2,4}$'
urlsdns = re.findall(href_regex_dns, text)
print(urlsdns)
with open('/home/PHURLresult.txt', 'w') as f:
for item in urls:
f.write("\n"+item)
f.close()
whitelist = ['aol', 'analytik-jena','gmx','gmail','web']
with open('/home/PHURLresult.txt', 'a') as f:
for item in urlsdns:
input=item.translate(item.maketrans('','','*%,&@!;<>:()"'))
if any(x in input for x in whitelist):
print (input)
else:
f.write("\n"+input)
f.close()