Example of python implementing ip proxy pool function
- 2021-07-09 08:58:06
- OfStack
In this paper, an example is given to illustrate the implementation of ip proxy pool function by python. Share it for your reference, as follows:
The crawling proxy source is the western thorn proxy.
Parsing pages with xpath Verify that ip is available with telnet Write a valid ip to the local txt. Of course, it can also be written into redis, mongodb, or set the detection program. When the number of ip in the agent pool is not enough (for example, less than 20), start the script to get ip again, and the code of this script should be changed accordingly.
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Version : 1.0
# @Time : 2018/10/23 Morning 10:40
# @Author : Yeoman
# @Description :
import urllib.request
import lxml.etree
import telnetlib
import os
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
}
def get_proxy(page_num):
# Get the page
req = urllib.request.Request('http://www.xicidaili.com/nn/{}'.format(page_num), headers=headers) # Structure request Request
response = urllib.request.urlopen(req) # Send a request
html = response.read()
html = html.decode('utf-8')
# print(html)
# Parse page
proxy_list = []
selector = lxml.etree.HTML(html)
rows = selector.xpath('//*[@id="ip_list"]//tr')
rows_total = len(rows)
row_xpath_head = '//*[@id="ip_list"]//tr['
row_ip_xpath_tail = ']/td[2]/text()'
row_port_xpath_tail = ']/td[3]/text()'
for i in range(1, rows_total):
ip_xpath = row_xpath_head + str(i+1) + row_ip_xpath_tail
port_xpath = row_xpath_head + str(i+1) + row_port_xpath_tail
ip = selector.xpath(ip_xpath)[0]
port = selector.xpath(port_xpath)[0]
ip_port = ip + ':' + port
proxy_list.append(ip_port)
return proxy_list
# Detection agent ip Available
def test_proxy_ip_port(proxy_ip_port):
print(' Current Agent ip : {}'.format(proxy_ip_port))
ip_port = proxy_ip_port.split(':')
ip = ip_port[0]
port = ip_port[1]
# Use telnet To verify ip Available
try:
telnetlib.Telnet(ip, port, timeout=10)
except:
return False
else:
return True
# Put the effective ip Write to local
def write_ip(proxy_ip):
with open('./ip.txt', 'a') as f:
f.write(proxy_ip + '\n')
# Delete a file
def del_file():
file_path = './ip.txt'
if os.path.exists(file_path):
os.remove(file_path)
def run():
del_file()
proxy_ip_port_list = []
for i in range(1, 6): # Front 5 Page
proxy_ip_port_list += get_proxy(i)
for i in range(100): # 1 Page has 100 Article
proxy_ip_port = proxy_ip_port_list[i]
is_valid = test_proxy_ip_port(proxy_ip_port)
print(is_valid)
if is_valid:
# Write ip To the local
write_ip(proxy_ip_port)
if __name__ == '__main__':
run()
For more information about Python, please see the topics of this site: "Summary of Python Socket Programming Skills", "Summary of Python Regular Expression Usage", "Python Data Structure and Algorithm Tutorial", "Summary of Python Function Use Skills", "Summary of Python String Operation Skills", "Introduction and Advanced Classic Tutorial of Python" and "Summary of Python File and Directory Operation Skills"
I hope this article is helpful to everyone's Python programming.