Собственно, вот здесь я про неё заикнулся: http://web-brains.com/archives/35
Выкладываю ужасно сырую, практически без комментариев и абсолютно без документации версию этой либы. Она называется FancyCurl. Я написал её для того, чтобы было легче обращаться с интерфейсом библиотеки pycurl.
import pycurl
import urllib
import copy
import re
# TODO:
# WTF is the SIGPIPE ignoring?
# improve cookies support, may be use 3rd part library
#Following piece of code was cuted from somewhere...
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
# the libcurl tutorial for more info.
try:
import signal
from signal import SIGPIPE, SIG_IGN
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
except ImportError:
pass
class Error(pycurl.error):
"""Used to indicate network error. The same as pycrl.errror"""
class SiteError(Error):
"""
Used to indicate error of the remote resource
It is usefull for example when we query server which name can not
be resolved
"""
def get( url ):
"""Simple function for fetching url using FancyCurl instance"""
curl = FancyCurl()
curl.setup('url',url)
curl.run()
return curl.body
class FancyCurl:
"""Fancy wrapper for pycurl library"""
def __init__(self):
self.timeout = 10
self.logFile = None
self.config = {}
self._bodyCallbacks = []
self.debug = False
self.lastError = None
self.freshPostData = False
self.freshCookies = False
self.oldUrl = None
self.debug = False
self.autoCookies = False
self.head = ''
self.body = ''
self.headers = {}
self.cookies = {}
def _bodyCallback(self,data):
"""Used to process anser body"""
if self.nobody:
return 0
else:
self.body = self.body + data
if self.maxsize:
if len(self.body) > self.maxsize:
return 0
if self._bodyCallbacks:
for callback in self._bodyCallbacks:
if not callback(data):
return 0
return len(data)
def _headCallback(self,data):
"""Used to process answer headers"""
if self.nohead:
return 0
else:
self.head = self.head + data
return len(data)
def request(self):
"""Run prepared curl request"""
self.curl.perform()
self.curl.close()
def setup(self,name,value = None):
"""
Configure curl request. Arguments variants:
1. name - option name, value - option value
2. name is dictionary, value is None
"""
if isinstance(name,dict):
for key,value in name.items():
self.setup(key,value)
else:
if 'post' == name:
self.freshPostData = True
if 'cookies' == name:
self.freshCookies = True
self.config[name] = value
def _changeState(self,name,value):
"""
Configure internal pycurl instance before request
"""
if isinstance(name,int):
self.curl.setopt(name,value)
# TODO: is it possible that dict passed to changeState?
elif isinstance(name,dict):
for key in name:
self.setup(key,name[key])
if 'post' == name:
self.curl.setopt(pycurl.POSTFIELDS,urllib.urlencode(value))
elif 'cookiefile' == name:
self.curl.setopt(pycurl.COOKIEJAR,value)
selft.curl.seopt(pycurl.COOKIEFILE,value)
elif 'logfile' == name:
self.logFile = value
elif 'url' == name:
self.curl.setopt(pycurl.URL,value)
elif 'proxy' == name:
self.curl.setopt(pycurl.PROXY,value)
elif 'timeout' == name:
self.curl.setopt(pycurl.TIMEOUT,value)
elif 'connectTimeout' == name:
self.curl.setopt(pycurl.CONNECTTIMEOUT,value)
elif 'referer' == name:
self.curl.setopt(pycurl.REFERER,value)
elif 'cookies' == name:
self.curl.setopt(pycurl.COOKIE,''.join((
urllib.quote_plus(a) + '=' + \
urllib.quote_plus(b) + ';'
for a,b in value.items())))
elif 'nobody' == name:
if True == value:
self.nobody = True
elif 'nohead' == name:
if True == value:
self.nohead = True
elif 'maxsize' == name:
self.maxsize = value
elif 'redirect' == name:
self.curl.setopt(pycurl.FOLLOWLOCATION,value)
elif 'userpwd' == name:
self.curl.setopt(pycurl.USERPWD,value)
elif 'bodyCallback' == name:
if isinstance(name,(list,tuple)):
self._bodyCallbacks = value
else:
self._bodyCallbacks.append(value)
elif 'autocookies' == name:
self.autoCookies = value
elif 'useragent' == name:
self.curl.setopt(pycurl.USERAGENT,value)
elif 'headers' == name:
self.curl.setopt(pycurl.HTTPHEADER,value)
else:
raise Exception, "unknown option"
def _prepare(self):
"""Prepare for request"""
self.curl = pycurl.Curl()
self.curl.setopt(pycurl.SSL_VERIFYPEER,0)
self.curl.setopt(pycurl.SSL_VERIFYHOST,0)
self.curl.setopt(pycurl.FOLLOWLOCATION,1)
self.curl.setopt(pycurl.TIMEOUT,self.timeout)
self.curl.setopt(pycurl.NOSIGNAL,1)
self.curl.setopt(pycurl.WRITEFUNCTION,self._bodyCallback)
self.curl.setopt(pycurl.HEADERFUNCTION,self._headCallback)
self.head = ''
self.body = ''
self.headers = {}
self.cookies = {}
self.maxsize = 0
self.nobody = False
self.nohead = False
self.useragent = ''
self.lastError = ''#pycurl.CURLE_OK
for name,value in self.config.items():
self._changeState(name,value)
# If we query new url we must reset old post and cookes information
# if they was not defined for new url becouse their values
# are still stored in the self.config
if self.oldUrl != self.config['url']:
if not self.freshPostData:
self.curl.setopt(pycurl.HTTPGET,1)
if not self.freshCookies:
self.curl.setopt(pycurl.COOKIE,'')
self.freshPostData = False
self.freshCookies = False
def run(self):
"""Do request"""
self._prepare()
try:
self.curl.perform()
except pycurl.error, err:
# CURLE_WRITE_ERROR
# An error occurred when writing received data to a local file, or
# an error was returned to libcurl from a write callback.
# This is expected error and we should ignore it
if 23 == err[0]:
pass
else:
self._finish()
self.lastError = err
# 6 - could not resolve host
# 47 - too many redirects
# 52 - nothing was returned from the server
# 58 - problem with the local client certificate
# 59 - couldn't use specified cipher
# 60 - problem with the CA cert (path? access rights?)
if err[0] in (6,47,52,58,59,60):
raise SiteError, err
raise Error, err
self._finish()
def runLoop(self, triesNumber, proxyRotator = None):
"""HZ"""
while None == triesNumber or triesNumber:
if proxyRotator:
self.setup('proxy',proxyRotator.getNext())
try:
self.run()
break
except SiteError, err:
print 'error',err
raise
except Error, err:
print 'error',err
triesNumber = triesNumber - 1
if not triesNumber:
raise
def _finish(self):
"""Process query result"""
self.oldUrl = self.config['url']
if self.maxsize:
self.body = self.body[0:self.maxsize]
if self.logFile:
open(self.logFile,'w').write(
self.config['url'] + '\n' + \
self.curl.errstr() + '\n' + \
self.head + '\n' + self.body)
for line in re.split('\r?\n',self.head):
try:
name,value = line.split(': ',1)
if 'Set-Cookie' == name:
match = re.search('^([^=]+)=([^;]+)*',value)
if match:
self.cookies[match.group(1)] = match.group(2)
else:
self.headers[name] = value
except ValueError:
pass
if self.autoCookies:
self.setup('cookies',self.cookies)
self.curl.close()
def getinfo(self):
"""HZ"""
self.curl.getinfo()
def errstr(self):
"""get request error text"""
self.curl.errstr()
def getConfig(self,name):
try:
return self.config[name]
except KeyError:
return ''