-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathinit.py
52 lines (38 loc) · 1.43 KB
/
init.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""init module.
Parses main awesome list to extract child urls to be parsed. Dumps JSON to external text file once done.
Author: Ankit Gyawali (https://github.com/ankitgyawali).
"""
import parse
import urllib.request
import json
import re
# Parent URL
url = "https://raw.githubusercontent.com/sindresorhus/awesome/master/readme.md"
try:
content = urllib.request.urlopen(url).read().decode("utf-8")
except:
print ("Failed to Parse Root URL : " +url)
# Main Object that will hold all awesome data
root = {}
root['headers'] = []
# Split by section markdown
mainHeaders = content.split('##')[2:-1]
# Loop and parse all the sections
for i, mainHeader in enumerate(mainHeaders):
mainHeaders[i] = mainHeader.splitlines()
if(mainHeader[i][0]!='License' and mainHeader[i][0]!='Contributors'):
head = {}
head['title'] = mainHeaders[i][0]
head['titleDetails'] = []
# Generate Name/URL for awesome lists
for j, headerLinks in enumerate(mainHeaders[i]):
details = {}
details['url'] = headerLinks.partition("(")[2].partition(")")[0]
details['name'] = headerLinks.partition("[")[2].partition("]")[0]
if(details['url']!='' and ('http' in details['url'])):
# IMPORTANT: This line does the parsing of all awesome URLS
details['details'] = parse.parseUrl(details['url'])
head['titleDetails'].append(details)
root['headers'].append(head)
with open('awesomewithdetails.json', 'w') as outfile:
json.dump(root, outfile)