Skip to content

Commit

Permalink
cleanup zomato
Browse files Browse the repository at this point in the history
  • Loading branch information
captn3m0 committed Jul 15, 2024
1 parent 1c99a01 commit e11591a
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/event-fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def find_event(l):
return (url, d)

data = JsonLdExtractor().extract(r.text)
print(data)
m = None
for x in data:
if x.get("@graph"):
Expand Down
6 changes: 2 additions & 4 deletions src/mmb.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from common.session import get_cached_session
import json
import datetime

from requests import Session

def fetch_events():
session = get_cached_session()
session = Session()
payload = {
"langId": "1",
"filterData": json.dumps(
Expand All @@ -24,6 +23,5 @@ def fetch_events():
for event in response.json()["eventItems"]:
print(f"https://www.goethe.de/ins/in/en/ver.cfm?event_id={event['object_id']}")


if __name__ == "__main__":
fetch_events()
21 changes: 13 additions & 8 deletions src/zomato.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@
from common import USER_AGENT_HEADERS
import os

EVENT_URL_REGEX = r"(https:\/\/www\.zomato\.com\/events\/[\w|-]+-et\d+)"

# Public Key, not-logged-in API key
ZOMATO_API_KEY = os.environ.get("ZOMATO_PUBLIC_API_KEY")
BASE_URL = "https://zoma.to/live-event/"
KNOWN_BAD_EVENTS = ["43612"] # SkyJumper Trampoline Park, too long
KNOWN_BAD_EVENTS = [
"43612", # SkyJumper Trampoline Park, too long
"44864", # Wonderla Amusement Park
]


def fix_date(date_str):
Expand All @@ -25,10 +30,10 @@ def get_events(session, event_id):
url = f"{BASE_URL}{event_id}"
session.browser = "chrome"
r = session.get(url, cache=True)
if "window.location.replace" in r.text:
redirect_url = r.text.split('window.location.replace("')[1].split('")')[0]
r = session.get(redirect_url, cache=True)
# print(r.__class__)
# search for event_url_regex in r.text
if re.search(EVENT_URL_REGEX, r.text):
event_url = re.search(EVENT_URL_REGEX, r.text).group(1)
r = session.get(event_url, cache=True)
from bs4 import BeautifulSoup

soup = BeautifulSoup(r.text, "html.parser")
Expand Down Expand Up @@ -96,14 +101,14 @@ def get_event_ids(session):
if "ZOMATO_PUBLIC_API_KEY" not in os.environ:
raise Exception("ZOMATO_PUBLIC_API_KEY not set")
events = []
limit = sys.argv[1] if len(sys.argv) > 1 else None
limit = int(sys.argv[1]) if len(sys.argv) > 1 else 1000
session = Fetch(cache={"serializer": "json"})
for event_id in sorted(get_event_ids(session)):
for event in get_events(session, event_id):
print(event["url"])
events.append(event)
if len(events) == limit:
break
if len(events) >= limit:
break

if len(events) == 0:
import sys
Expand Down

0 comments on commit e11591a

Please sign in to comment.