主要介绍怎么使用pycurl的基本使用方法,参考官网的实例代码例举!!!
- 基本使用方法
c = pycurl.Curl() #创建一个curl对象 c.setopt(pycurl.CONNECTTIMEOUT, 5) #连接的等待时间,设置为0则不等待 c.setopt(pycurl.TIMEOUT, 5) #请求超时时间 c.setopt(pycurl.NOPROGRESS, 0) #是否屏蔽下载进度条,非0则屏蔽 c.setopt(pycurl.MAXREDIRS, 5) #指定HTTP重定向的最大数 c.setopt(pycurl.FORBID_REUSE, 1) #完成交互后强制断开连接,不重用 c.setopt(pycurl.FRESH_CONNECT,1) #强制获取新的连接,即替代缓存中的连接 c.setopt(pycurl.DNS_CACHE_TIMEOUT,60) #设置保存DNS信息的时间,默认为120秒 c.setopt(pycurl.URL,"http://www.baidu.com") #指定请求的URL c.setopt(pycurl.USERAGENT,"Mozilla/5.2 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50324)") #配置请求HTTP头的User-Agent c.setopt(pycurl.HEADERFUNCTION, getheader) #将返回的HTTP HEADER定向到回调函数getheader c.setopt(pycurl.WRITEFUNCTION, getbody) #将返回的内容定向到回调函数getbody c.setopt(pycurl.WRITEHEADER, fileobj) #将返回的HTTP HEADER定向到fileobj文件对象 c.setopt(pycurl.WRITEDATA, fileobj) #将返回的HTML内容定向到fileobj文件对象 c.getinfo(pycurl.HTTP_CODE) #返回的HTTP状态码 c.getinfo(pycurl.TOTAL_TIME) #传输结束所消耗的总时间 c.getinfo(pycurl.NAMELOOKUP_TIME) #DNS解析所消耗的时间 c.getinfo(pycurl.CONNECT_TIME) #建立连接所消耗的时间 c.getinfo(pycurl.PRETRANSFER_TIME) #从建立连接到准备传输所消耗的时间 c.getinfo(pycurl.STARTTRANSFER_TIME) #从建立连接到传输开始消耗的时间 c.getinfo(pycurl.REDIRECT_TIME) #重定向所消耗的时间 c.getinfo(pycurl.SIZE_UPLOAD) #上传数据包大小 c.getinfo(pycurl.SIZE_DOWNLOAD) #下载数据包大小 c.getinfo(pycurl.SPEED_DOWNLOAD) #平均下载速度 c.getinfo(pycurl.SPEED_UPLOAD) #平均上传速度 c.getinfo(pycurl.HEADER_SIZE) #HTTP头部大小 - python2
import pycurl from StringIO import StringIO buffer = StringIO() c = pycurl.Curl() c.setopt(c.URL, 'http://www.pythontab.com/') c.setopt(c.WRITEDATA, buffer) c.perform() c.close() body = buffer.getvalue() print(body) - python3
import pycurl from io import BytesIO buffer = BytesIO() c = pycurl.Curl() c.setopt(c.URL, 'http://www.pythontab.com/') c.setopt(c.WRITEDATA, buffer) c.perform() c.close() body = buffer.getvalue() print(body.decode('iso-8859-1')) - PycURL 自动处理cookie
import pycurl import StringIO url = "http://www.google.com/" crl = pycurl.Curl() crl.setopt(pycurl.VERBOSE,1) crl.setopt(pycurl.FOLLOWLOCATION, 1) crl.setopt(pycurl.MAXREDIRS, 5) crl.fp = StringIO.StringIO() crl.setopt(pycurl.URL, url) crl.setopt(crl.WRITEFUNCTION, crl.fp.write) # Option -b/--cookie <name=string/file> Cookie string or file to read cookies from # Note: must be a string, not a file object. crl.setopt(pycurl.COOKIEFILE, "cookie_file_name") # Option -c/--cookie-jar <file> Write cookies to this file after operation # Note: must be a string, not a file object. crl.setopt(pycurl.COOKIEJAR, "cookie_file_name") crl.perform() print crl.fp.getvalue() - PycURL 实现POST方法
import pycurl import StringIO import urllib url = "http://www.google.com/" post_data_dic = {"name":"value"} crl = pycurl.Curl() crl.setopt(pycurl.VERBOSE,1) crl.setopt(pycurl.FOLLOWLOCATION, 1) crl.setopt(pycurl.MAXREDIRS, 5) #crl.setopt(pycurl.AUTOREFERER,1) crl.setopt(pycurl.CONNECTTIMEOUT, 60) crl.setopt(pycurl.TIMEOUT, 300) #crl.setopt(pycurl.PROXY,proxy) crl.setopt(pycurl.HTTPPROXYTUNNEL,1) #crl.setopt(pycurl.NOSIGNAL, 1) crl.fp = StringIO.StringIO() crl.setopt(pycurl.USERAGENT, "dhgu hoho") # Option -d/--data <data> HTTP POST data crl.setopt(crl.POSTFIELDS, urllib.urlencode(post_data_dic)) crl.setopt(pycurl.URL, url) crl.setopt(crl.WRITEFUNCTION, crl.fp.write) crl.perform() print crl.fp.getvalue() - urllib 超时设置
import socket socket.setdefaulttimeout(5.0) - 根据服务器编码进行解码
import pycurl import re try: from io import BytesIO except ImportError: from StringIO import StringIO as BytesIO headers = {} def header_function(header_line): # HTTP standard specifies that headers are encoded in iso-8859-1. # On Python 2, decoding step can be skipped. # On Python 3, decoding step is required. header_line = header_line.decode('iso-8859-1') # Header lines include the first status line (HTTP/1.x ...). # We are going to ignore all lines that don't have a colon in them. # This will botch headers that are split on multiple lines... if ':' not in header_line: return # Break the header line into header name and value. name, value = header_line.split(':', 1) # Remove whitespace that may be present. # Header lines include the trailing newline, and there may be whitespace # around the colon. name = name.strip() value = value.strip() # Header names are case insensitive. # Lowercase name here. name = name.lower() # Now we can actually record the header name and value. # Note: this only works when headers are not duplicated, see below. headers[name] = value buffer = BytesIO() c = pycurl.Curl() c.setopt(c.URL, 'http://pycurl.io') c.setopt(c.WRITEFUNCTION, buffer.write) # Set our header function. c.setopt(c.HEADERFUNCTION, header_function) c.perform() c.close() # Figure out what encoding was sent with the response, if any. # Check against lowercased header name. encoding = None if 'content-type' in headers: content_type = headers['content-type'].lower() match = re.search('charset=(\S+)', content_type) if match: encoding = match.group(1) print('Decoding using %s' % encoding) if encoding is None: # Default encoding for HTML is iso-8859-1. # Other content types may have different default encoding, # or in case of binary data, may have no encoding at all. encoding = 'iso-8859-1' print('Assuming encoding is %s' % encoding) body = buffer.getvalue() # Decode using the encoding we figured out. print(body.decode(encoding)) - 写入一个文件
import pycurl # 只要文件以二进制模式打开, python 2 和 python 3 # 可以在不解码的情况下编写响应正文。 with open('out.html', 'wb') as f: c = pycurl.Curl() c.setopt(c.URL, 'http://pycurl.io/') c.setopt(c.WRITEDATA, f) c.perform() c.close() - 设置重定向
import pycurl c = pycurl.Curl() # Redirects to https://www.python.org/. c.setopt(c.URL, 'http://www.python.org/') # Follow redirect. c.setopt(c.FOLLOWLOCATION, True) c.perform() c.close() - 检查响应
import pycurl try: from io import BytesIO except ImportError: from StringIO import StringIO as BytesIO buffer = BytesIO() c = pycurl.Curl() c.setopt(c.URL, 'http://pycurl.io/') c.setopt(c.WRITEDATA, buffer) c.perform() # HTTP response code, e.g. 200. print('Status: %d' % c.getinfo(c.RESPONSE_CODE)) # Elapsed time for the transfer. print('Status: %f' % c.getinfo(c.TOTAL_TIME)) # getinfo must be called before close. c.close() - post
# 若要发送表单数据, 请使用POSTFIELDS选项。表单数据必须预先进行 URL 编码 import pycurl try: # python 3 from urllib.parse import urlencode except ImportError: # python 2 from urllib import urlencode c = pycurl.Curl() c.setopt(c.URL, 'https://httpbin.org/post') post_data = {'field': 'value'} # Form data must be provided already urlencoded. postfields = urlencode(post_data) # Sets request method to POST, # Content-Type header to application/x-www-form-urlencoded # and data to send in request body. c.setopt(c.POSTFIELDS, postfields) c.perform() c.close() - 文件上传
# 若要上载文件, 请使用HTTPPOST选项。若要上载物理文件, 请使用FORM_FILE import pycurl c = pycurl.Curl() c.setopt(c.URL, 'https://httpbin.org/post') c.setopt(c.HTTPPOST, [ ('fileupload', ( # upload the contents of this file c.FORM_FILE, __file__, )), ]) c.perform() c.close() # 如果文件数据在内存中, 请使用BUFFER/BUFFERPTR import pycurl c = pycurl.Curl() c.setopt(c.URL, 'https://httpbin.org/post') c.setopt(c.HTTPPOST, [ ('fileupload', ( c.FORM_BUFFER, 'readme.txt', c.FORM_BUFFERPTR, 'This is a fancy readme file', )), ]) c.perform() c.close()
评论
发表评论