-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxml-extraction.py
38 lines (31 loc) · 1.43 KB
/
xml-extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# The program will prompt for a URL,
# read the XML data from that URL using urllib
# and then parse and extract the comment counts from the XML data,
# compute the sum of the numbers in the file and enter the sum
import urllib.request, urllib.parse, urllib.error
import xml.etree.ElementTree as ET
import ssl
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
while True:
try: # prompt the user for a url location and read it with urllib
url = input('Enter location: ')
xml = urllib.request.urlopen(url, context=ctx).read()
break
except: # prevent invalid urls from crashing the program
print('Invalid url. Try again.')
continue
print('Retrieving', url) # let the user know retrieval was successful
print('Retrieved', len(xml), 'characters') # get the total number of characters in the file
# Parse the comments in the XML data
tree = ET.fromstring(xml) # get the xml element tree
lst = tree.findall('comments/comment') # place all comment tags in a list
print('Comment count:', len(lst)) # get the total number of comment tags
# Extract the counts and add them together
sum = 0 # initalize a sum variable
for item in lst: # go through each comment item in the list
count = item.find('count').text # retrieve the count text
sum += int(count) # compute the sum of the extracted counts
print('The sum of all comments is:', sum)