-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
38 lines (35 loc) · 993 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import urllib.request, urllib.error
import pandas as pd
df = pd.read_excel('input.xlsx')
#print the column names
#print(df.columns)
statuses = []
urls = []
data = []
#get the values for a given column
values = df['Website Url'].values
for url in values :
print(url)
try:
conn = urllib.request.urlopen(url)
except urllib.error.HTTPError as e:
# Return code error (e.g. 404, 501, ...)
print('HTTPError: {}'.format(e.code))
status = e.code
except urllib.error.URLError as e:
# Not an HTTP-specific error (e.g. connection refused)
print('URLError: {}'.format(e.reason))
status = e.reason
else:
# 200
status=conn.getcode()
print(status)
urls.append(url)
statuses.append(status)
data.append([url, status])
columns=['Website Url','Status']
df = pd.DataFrame(data, columns=columns)
writer = pd.ExcelWriter('output/file.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1', index=False)
writer.close()