-
Notifications
You must be signed in to change notification settings - Fork 431
/
Copy pathcategory.py
92 lines (73 loc) · 2.61 KB
/
category.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from bs4 import BeautifulSoup
import requests
import urllib
import os
import re
write_to_file = False
def get_problems(cat_heading, url):
data = requests.get(url).text
soup = BeautifulSoup(data, features="lxml")
table = soup.findAll("table")[0]
prob_data = ""
prob_data += "# {}\n".format(cat_heading)
prob_data += "\n"
prob_data += "\n"
headings = table.find_all('th')
heading_text = "| Checkbox | ID | Problem Name"
for i in range(2, len(headings)):
heading_text += "|{}".format(headings[i].get_text())
prob_data += heading_text + "|\n"
prob_data += "|:---:" * (len(headings) + 1) + "|"
prob_data += "\n"
# print(table)
for row in table.find_all("tr")[1:]:
tds = row.find_all("td")
prob = "|<ul><li>- [ ] Done</li></ul>"
id = tds[0].get_text()
name = tds[1].get_text()
link = tds[1].find("a").get('href')
prob += "|{}|[{}]({})".format(id, name, link)
for i in range(2, len(tds)):
prob += "|{}".format(tds[i].get_text())
prob += "|\n"
prob_data += prob
# print(prob)
file = open(os.path.join("categories", cat_heading, "README.md"), 'w')
file.write(prob_data)
file.close()
base_url = "https://www.a2oj.com/"
url = "https://www.a2oj.com/Categories.html"
data = requests.get(url).text
soup = BeautifulSoup(data, features="lxml")
tables = soup.findAll("table")
headings = [th.get_text() for th in tables[0].find("tr").find_all("th")]
# print(headings)
final_data = "#Categories\n"
final_data += "\n"
heading_text = "| Checkbox | ID | Category | Problems Count |\n"
final_data += heading_text
final_data += "|---|:---:|:---:|---|\n"
for row in tables[0].find_all("tr")[1:]:
tds = row.find_all("td")
id = tds[0].get_text()
name = tds[1].get_text()
link = "{}".format(name)
problems_count = tds[2].get_text()
dataset = [id, name, link, problems_count]
dir_name = re.sub('[\,\\\/\&\?\(\)]', ' ', name).rstrip()
dir_name = re.sub(' +', '_', dir_name)
dir_name = "{}. {}".format(id.zfill(3), dir_name)
final_data += "|<ul><li>- [ ] Done</li></ul>|{}|[{}]({}/README.md)|{}|\n".format(
id, name, urllib.parse.quote(dir_name), problems_count)
# print(dir_name)
# os.mkdir("categories/{}. {}".format(id.zfill(3), dir_name))
category_link = base_url + tds[1].find('a').get('href')
print(dir_name)
if write_to_file:
print(get_problems(dir_name, category_link))
print(final_data)
if write_to_file:
file = open(os.path.join("categories", "README.md"), 'w')
file.write(final_data)
file.close()
print("Done")