Detailed Explanation of Python Pycurl Attribute and Method Case


Pycurl package is an libcurl Python interface, written by C language, powerful, fast. Because pycurl has too many properties and methods, write this blog post to record the properties and methods of pycurl.

Normal installation

pip install pycurl

If there is a problem, you can search for the installation method by system version, such as installing pycurl for centos 7.1

General request method

import pycurl,urllib
from io import BytesIO

url = 'http://www.baidu.com'

headers = [
	"User-Agent:Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3",
]

data = {
	"cityListName":"",
	"trade": ""
	}

c = pycurl.Curl()	# Pass curl Method construction 1 Objects
#c.setopt(pycurl.REFERER, 'http://www.baidu.com/')	# Settings referer
c.setopt(pycurl.FOLLOWLOCATION, True)	# Automatic jump grasping
c.setopt(pycurl.MAXREDIRS,5)			# How many jumps do you set
c.setopt(pycurl.CONNECTTIMEOUT, 60)		# Set link timeout
c.setopt(pycurl.TIMEOUT,120)			# Download timeout
c.setopt(pycurl.ENCODING, 'gzip,deflate')	# Deal with gzip Content
# c.setopt(c.PROXY,ip)	#  Agent
c.fp = BytesIO()
c.setopt(pycurl.URL, url)	# To set the URL
c.setopt(pycurl.HTTPHEADER,headers)		# Incoming request header
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, urllib.urlencode(data))		# Incoming POST Data
c.setopt(c.WRITEFUNCTION, c.fp.write)	# Callback write string cache
c.perform()

code = c.getinfo(c.HTTP_CODE)	# Return status code
html = c.fp.getvalue()	# Return source code

print c.getinfo(c.TOTAL_TIME)

GET Request Method

c = pycurl.Curl()   # Pass curl Method construction 1 Objects
c.setopt(pycurl.FOLLOWLOCATION, True)   # Automatic jump grasping
c.setopt(pycurl.MAXREDIRS,5)            # How many jumps do you set
c.setopt(pycurl.CONNECTTIMEOUT, 60)     # Set link timeout
c.setopt(pycurl.TIMEOUT,120)            # Download timeout
c.setopt(pycurl.ENCODING, 'gzip,deflate')   # Deal with gzip Content
# c.setopt(c.PROXY,ip)  #  Agent
c.fp = BytesIO()
c.setopt(pycurl.URL, url)   # To set the URL
c.setopt(pycurl.USERAGENT,ua) # Incoming ua
# c.setopt(pycurl.HTTPHEADER,self.headers)     # Incoming request header
c.setopt(c.WRITEFUNCTION, c.fp.write)   # Callback write string cache
c.perform()
code = c.getinfo(c.HTTP_CODE)   # Return status code
html = c.fp.getvalue()  # Return source code

POST Request Method

c = pycurl.Curl()   # Pass curl Method construction 1 Objects
c.setopt(pycurl.FOLLOWLOCATION, True)   # Automatic jump grasping
c.setopt(pycurl.MAXREDIRS,5)            # How many jumps do you set
c.setopt(pycurl.CONNECTTIMEOUT, 60)     # Set link timeout
c.setopt(pycurl.TIMEOUT,120)            # Download timeout
c.setopt(pycurl.ENCODING, 'gzip,deflate')   # Deal with gzip Content
# c.setopt(c.PROXY,ip)  #  Agent
c.fp = BytesIO()
c.setopt(pycurl.URL, url)   # To set the URL
c.setopt(pycurl.USERAGENT,ua ) # Incoming User-Agent
# c.setopt(pycurl.HTTPHEADER,headers)     # Incoming request header
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, urllib.parse.urlencode(data))
c.setopt(c.WRITEFUNCTION, c.fp.write)   # Callback write string cache
c.perform()
code = c.getinfo(c.HTTP_CODE)   # Return status code
html = c.fp.getvalue()  # Return source code

windows Access https

windows Method for accessing https requires a certificate

import certifi
c.setopt(pycurl.CAINFO, certifi.where())

Get the address of the web page after multiple jumps

c.getinfo(pycurl.EFFECTIVE_URL)  Get the final address of the web page
c.setopt(pycurl.COOKIEFILE, "cookie_file_etherscan") # Read cookie
c.setopt(pycurl.COOKIEJAR, "cookie_file_etherscan") # Settings cookie

Other attributes

Part of pycurl API:

pycurl.Curl() # Create 1 A pycurl Method of object
pycurl.Curl(pycurl.URL, http://www.google.com.hk) # To set the URL
pycurl.Curl().setopt(pycurl.MAXREDIRS, 5) # Set the maximum number of redirects
pycurl.Curl().setopt(pycurl.CONNECTTIMEOUT, 60)
pycurl.Curl().setopt(pycurl.TIMEOUT, 300) # Connection timeout settings
pycurl.Curl().setopt(pycurl.USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)") # Analog browser
pycurl.Curl().perform() # Information returned from the server
pycurl.Curl().getinfo(pycurl.HTTP_CODE) # View HTTP State of   Similar urllib Medium status Attribute


pycurl.NAMELOOKUP_TIME  Domain name resolution time
pycurl.CONNECT_TIME  Remote server connection time
pycurl.PRETRANSFER_TIME  The time between the connection and the start of transmission
pycurl.STARTTRANSFER_TIME  Received the first 1 Bytes of time
pycurl.TOTAL_TIME  Upper 1 Total request time
pycurl.REDIRECT_TIME  If there is a turn, the time spent
pycurl.HTTP_CODE HTTP  Response code
pycurl.REDIRECT_COUNT  Number of redirects
pycurl.SIZE_UPLOAD  Uploaded data size
pycurl.SIZE_DOWNLOAD  Downloaded data size
pycurl.SPEED_UPLOAD  Upload speed
pycurl.HEADER_SIZE  Head size
pycurl.REQUEST_SIZE  Request size
pycurl.CONTENT_LENGTH_DOWNLOAD  Download content length
pycurl.CONTENT_LENGTH_UPLOAD  Upload content length
pycurl.CONTENT_TYPE  Type of content
pycurl.RESPONSE_CODE  Response code
pycurl.SPEED_DOWNLOAD  Download speed
pycurl.INFO_FILETIME  Time information of file
pycurl.HTTP_CONNECTCODE HTTP  Connection code

Reference document

http://pycurl.io/docs/latest/quickstart.html