-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathonlineToBib.py
59 lines (49 loc) · 1.62 KB
/
onlineToBib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import sys
import bibtexparser
import lxml.html
from datetime import date
from urllib.parse import urlparse
from urllib.request import urlopen #used to handle https with lxml
def generate_entry(url):
print(url)
parsed_html = lxml.html.parse(urlopen(url))
parsed_uri = urlparse(url)
author = parsed_uri.netloc
title = parsed_html.find(".//title").text
entry = {
'author': author,
'title': title[:-2], #title contains newline
'url': url,
'note': 'retrieved at ' + date.today().__str__(),
'year': date.today().year.__str__(),
'ENTRYTYPE': 'MISC',
'ID': 'webpage:' + author
}
return entry
arglen = len(sys.argv)
if (arglen > 3):
print('Usage: onlineToBib.py <url>\n'
'OR\n'
'Usage: onlineToBib.py <inputfile> <outputfile>'
'OR\n'
'Usage: onlineToBib.py')
if (arglen == 2):
db = bibtexparser.bibdatabase.BibDatabase()
db.entries = [generate_entry(sys.argv[1])]
writer = bibtexparser.bwriter.BibTexWriter()
writer.contents = ['comments', 'entries']
writer.indent = ' '
writer.order_entries_by = ('ENTRYTYPE', 'author', 'year')
print(writer.write(db))
sys.exit(0)
db = bibtexparser.bibdatabase.BibDatabase()
inputfile = 'urls.txt'
outputfile = 'out.bib'
if (arglen == 3):
inputfile = sys.argv[1]
outputfile = sys.argv[2]
with open('urls.txt', 'r') as url_file:
for url in url_file: #file must contain only urls seperated by newlines
db.entries.append(generate_entry(url[:-1]))
with open('out.bib','w') as bibtex_output_file:
bibtexparser.dump(db,bibtex_output_file)