generated from ridlees/scraper-boilerplate.py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
94 lines (77 loc) · 2.54 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import asyncio
from pyppeteer.launcher import launch
from bs4 import BeautifulSoup
import sqlite3
from sender import send_email
from dotenv import load_dotenv
import os
from os.path import join, dirname
dotenv_path = join(dirname(__file__), '.env')
load_dotenv(dotenv_path)
port = os.environ.get("PORT")
smtp_server = os.environ.get("SMTP_SERVER")
sender_email = os.environ.get("SENDER_EMAIL")
password = os.environ.get("PASSWORD")
email = os.environ.get("RECEIVER_EMAIL")
path_to_file = os.environ.get("VENUES")
async def pyppetet(url):
browser = await launch()
page = await browser.newPage()
await page.goto(url)
await page.waitFor(7000)
## Get HTML
html = await page.content()
await browser.close()
return html
def compare_with_db(items, cur):
new_items = []
res = cur.execute("SELECT DISTINCT url FROM events")
old_items = [item[0] for item in res.fetchall()]
for item in items:
if item not in old_items:
new_items.append(item)
return new_items
def save_to_db(items, cur):
for item in items:
cur.execute("""INSERT INTO events (url)
VALUES (?);""", (item,))
def get_event_array(url):
html_response = asyncio.get_event_loop().run_until_complete(pyppetet(url))
## Load HTML Response Into BeautifulSoup
soup = BeautifulSoup(html_response, "html.parser")
items_row = soup.find_all("div", class_="items row")[0]
if items_row == None:
return None
items = []
for item in items_row.find_all("a", class_="title"):
link = "https://goout.net" + item.get("href")
items.append(link)
return items
def sender(events):
subject = "Goout Watchdog"
text = "Ahoj Kris,\n\n Mám pro tebe nové akce:\n"
for event in events:
text = text + event + "\n\n"
message = f"From: {sender_email}\r\nTo: {email}\r\nSubject:{subject}\r\n{text}"
send_email(port, smtp_server, sender_email, password, email, message)
def event_checker(url, cur, con):
items = get_event_array(url)
if items == None:
return ""
new_items = compare_with_db(items, cur)
save_to_db(new_items,cur)
con.commit()
return new_items
def main():
con = sqlite3.connect("events.db")
cur = con.cursor()
with open(path_to_file) as file:
new_items = []
for line in file.readlines():
print(line)
new_items = new_items + event_checker(line, cur, con)
if new_items:
print("sending")
sender(new_items)
if __name__ == '__main__':
main()