Skip to content

Commit

Permalink
Adds Champaca
Browse files Browse the repository at this point in the history
  • Loading branch information
captn3m0 committed Apr 1, 2024
1 parent 4626d40 commit 84e1bf4
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 10 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ We either have URLs that can be easily scraped (https://schema.org/Event), or a
| https://events.venn.buzz/ | ✔️ | ✔️ || |
| https://linktr.ee/atta_galatta | ✔️ | ✔️ || |
| Zomato | ✔️ | ✔️ || |
| Champaca | ✔️ | ✔️ | | |
| Champaca | ✔️ | ✔️ | ✔️ | |
| [Visvesvaraya Museum][vism]. |||| OCR |
| [NGMA][ngma] |||| OCR The [older website calender](http://www.ngmaindia.gov.in/ngma_bangaluru_calendar.asp) is not updated. |
| [Sofar][sofar] | ✔️ | ✔️ | ✔️ | |
Expand Down
128 changes: 119 additions & 9 deletions src/champaca.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,96 @@
import datetime
import json
import dateutil.parser
import re
from lxml import etree
import datefinder
import urllib.parse
from math import ceil

HEADERS = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}

def make_request(url):
parsed_url = url.split("://")[1]
host, path = parsed_url.split("/", 1)
conn = http.client.HTTPSConnection(host)
conn.request("GET", "/" + path)
conn.request("GET", "/" + path, headers=HEADERS)
response = conn.getresponse()
return response.read()


def get_price(product_url):
parsed_url = urllib.parse.urlparse(product_url)
path = parsed_url.path
j = json.loads(make_request("https://champaca.in" + path + ".json"))
for variant in j["product"]["variants"]:
return str(ceil(float(variant["price"])))


def guess_event_type(title):
if "Workshop" in title:
return "EducationEvent"
if "Book" in title:
return "LiteraryEvent"
if "Children" in title:
return "ChildrensEvent"
return "Event"


# Generate as per the schema.org/Event specification
def make_event(title, starttime, description, url, product_urls):
tz = datetime.timezone(datetime.timedelta(hours=5, minutes=30))
performer_regexes = [
r'/by (?P<name>(\w|\s)+)\s(\||:)/',
r'/with (?P<name>(\w|\s)+)\s(\||:)/',
]
performer = None
for regex in performer_regexes:
match = re.search(regex, title)
if match:
performer = match.group("name")
break
e = {
"@context": "http://schema.org",
"@type": guess_event_type(title),
"name": title,
"startDate": starttime.replace(tzinfo=tz).isoformat(),
"duration": "PT2H",
"description": description,
"url": url,
"offers": [
{
"@type": "Offer",
"url": url,
"price": get_price(url),
"priceCurrency": "INR",
}
for url in product_urls
],
"location": {
"@type": "Place",
"name": "Champaca Bookstore",
"address": {
"@type": "PostalAddress",
"streetAddress": "7/1 Edward Road, Off Queens Road",
"addressLocality": "Bengaluru",
"postalCode": "560051",
"addressCountry": "India",
},
},
}

for offer in e["offers"]:
if offer["price"] == "0":
e["isAccessibleForFree"] = True

if performer:
e["performer"] = {"@type": "Person", "name": performer}

return e


def fetch_events():
url = "https://champaca.in/blogs/events.atom"
content = make_request(url)
Expand All @@ -23,26 +102,57 @@ def fetch_events():
events = []

# Iterate over each entry in the feed
for entry in tree.xpath('//xmlns:entry', namespaces={"xmlns": "http://www.w3.org/2005/Atom"})[0:5]:
title = entry.find('.//xmlns:title', namespaces={"xmlns": "http://www.w3.org/2005/Atom"}).text
for entry in tree.xpath(
"//xmlns:entry", namespaces={"xmlns": "http://www.w3.org/2005/Atom"}
)[0:5]:
title = entry.find(
".//xmlns:title", namespaces={"xmlns": "http://www.w3.org/2005/Atom"}
).text
html_content = entry.find(
".//xmlns:content", namespaces={"xmlns": "http://www.w3.org/2005/Atom"}
).text
# get all text from div or P elements
description_text = " ".join(
[
p
for p in etree.HTML(html_content).xpath(
"//div//text() | //p//text()"
)
]
)
url = entry.find(
".//xmlns:link", namespaces={"xmlns": "http://www.w3.org/2005/Atom"}
).attrib["href"]

doc = etree.HTML(html_content)
links = doc.xpath(
'//a[starts-with(@href, "https://champaca.in/products/")]/@href'
)
# Find future dates in the title
future_dates = list(datefinder.find_dates(title, index=True, source=True))
if future_dates:
# Get the first future date found
future_date = next((date for date, idx, src in future_dates if date > datetime.datetime.now()), None)
future_date = next(
(
date
for date, idx, src in future_dates
if date > datetime.datetime.now()
),
None,
)
if future_date:
# Calculate the difference in days between now and the future date
days_difference = (future_date - datetime.datetime.now()).days
if days_difference <= 30 and days_difference >= 1:
tz = datetime.timezone(datetime.timedelta(hours=5, minutes=30)) # Asia/Kolkata timezone
events.append({
"title": title,
"starttime": future_date.replace(tzinfo=tz).isoformat(),
})
tz = datetime.timezone(datetime.timedelta(hours=5, minutes=30))

events.append(
make_event(title, future_date, description_text, url, links)
)

return events


if __name__ == "__main__":
# write to champaca.json
with open("out/champaca.json", "w") as f:
Expand Down
1 change: 1 addition & 0 deletions src/event-fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
'out/scigalleryblr.json',
'out/sumukha.json',
'out/bluetokai.json',
'out/champaca.json',
]

KNOWN_EVENT_TYPES = [
Expand Down

0 comments on commit 84e1bf4

Please sign in to comment.