Two ways to fake HTTP headers in python using urllib2

  • 2020-04-02 13:47:53
  • OfStack

When collecting web page information, it is often necessary to forge headers to realize the effective execution of the collection script

Next, we'll use the header section of urllib2 to fake the header to collect information

Methods 1.


#!/usr/bin/python
# -*- coding: utf-8 -*-
#encoding=utf-8
#Filename:urllib2-header.py
 
import urllib2
import sys
 
# Crawling web content - Send the header -1
url= "//www.jb51.net"
send_headers = {
 'Host':'www.jb51.net',
 'User-Agent':'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0',
 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 'Connection':'keep-alive'
}
 
req = urllib2.Request(url,headers=send_headers)
r = urllib2.urlopen(req)
 
html = r.read()        # Back to web content 
receive_header = r.info()     # The header information returned 
 
# sys.getfilesystemencoding() 
html = html.decode('utf-8','replace').encode(sys.getfilesystemencoding()) # transcoding : Avoid garbled output  
 
print receive_header
# print '####################################'
print html

Method 2,


#!/usr/bin/python
# -*- coding: utf-8 -*-
#encoding=utf-8
#Filename:urllib2-header.py
 
import urllib2
import sys
 
url = '//www.jb51.net'
 
req = urllib2.Request(url)
req.add_header('Referer','//www.jb51.net/')
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0')
r = urllib2.urlopen(req)
 
html = r.read()
receive_header = r.info()
 
html = html.decode('utf-8').encode(sys.getfilesystemencoding())
 
print receive_header
print '#####################################'
print html


Related articles: