-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathproxy.py
76 lines (66 loc) · 2.07 KB
/
proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#! /usr/bin/python
# coding='utf-8'
"""
Author: zhouzying
URL: www.zhouzying.cn
Data: 2018-11-11
"""
import requests
from bs4 import BeautifulSoup
import re
import http.client
def get_proxy():
"""
获取代理列表
:return: proies
"""
headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36"}
# url = 'http://www.xicidaili.com/'
# 国内高代理
url = 'http://www.xicidaili.com/nn/1'
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'html5lib')
# table = soup.find('table', attrs={'id': 'ip_list'})
# 提取ip
proies = []
for tr in soup.table.tbody.find_all_next('tr'):
items = {}
# 提取ip
ip_pattern = "<td>(\d+.\d+.\d+.\d+)</td>"
ip = re.findall(ip_pattern, str(tr))
if len(ip) == 0:
pass
else:
items['ip'] = ip[0]
# 提取端口号
port_pattern = "<td>(\d+)</td>"
port = re.findall(port_pattern, str(tr))
items['port'] = port[0]
# print(port)
# 提取协议
scheme_pattern = "<td>(HTTPS?)</td>"
scheme = re.findall(scheme_pattern, str(tr))
items['scheme'] = str(scheme[0]).lower()
# print(scheme)
proies.append(items)
return proies
def verifyproxy(proxies):
"""
验证代理的有效性
:param proxies:
:return:
"""
url = "http://www.baidu.com"
headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36"}
for item in proxies:
ip = item['ip']
port = item['port']
try:
conn = http.client.HTTPConnection(ip, port, timeout=5)
conn.request(method='GET', url=url, headers=headers)
print("代理可用:{}:{}".format(ip, port))
# 请求出现异常
except:
print("代理不可用:{}:{}".format(ip, port))
proxies = get_proxy()
verifyproxy(proxies)