2012-01-31 python tornado internet async
Пример асинхронного загрузчика сайтов на tornado и curl.
Пример для python2.7
# coding: utf8
import tornado
from tornado import httpclient
from tornado import gen
from functools import partial
import Queue
gloop = tornado.ioloop.IOLoop.instance()
qinput = Queue.Queue()
process_count = 0
process_max = 10 # maximum count of query for one moment
# fill queue
for i in xrange(100):
qinput.put('http://www.wikipedia.org/')
def data_process(data):
# method for process data
pass
@gen.engine
def process(url):
global process_count, worker
try:
http_client = httpclient.AsyncHTTPClient()
response = yield gen.Task(http_client.fetch, str(url))
if response.error: raise Exception(response.error)
data_process(response.body)
except Exception as e:
print e
process_count -= 1
gloop.add_callback(worker)
def worker():
global gloop, process_count, process_max
print '# %d / %d (%d)' % (process_count, process_max, qinput.qsize())
while process_count < process_max:
if qinput.empty(): break
url = qinput.get_nowait()
process_count += 1
gloop.add_callback(partial(process, url))
if qinput.empty():
if not process_count: gloop.stop()
print 'start'
gloop.add_callback(worker)
tornado.httpclient.AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
gloop.start()
print 'finish'
method="GET", headers=None, body=None,
auth_username=None, auth_password=None,
connect_timeout=20.0, request_timeout=20.0,
if_modified_since=None, follow_redirects=True,
max_redirects=5, user_agent=None, use_gzip=True,
network_interface=None, streaming_callback=None,
header_callback=None, prepare_curl_callback=None,
proxy_host=None, proxy_port=None, proxy_username=None,
proxy_password='', allow_nonstandard_methods=False,
validate_cert=True, ca_certs=None,
allow_ipv6=None,
client_key=None, client_cert=None