@StrGlee
2016-10-20T11:24:02.000000Z
字数 1653
阅读 1166
pyspider
Flask>=0.10Jinja2>=2.7chardet>=2.2cssselect>=0.9lxmlpycurlpyqueryrequests>=2.2tornado>=3.2mysql-connector-python>=1.2.2pika>=0.9.14pymongo>=2.7.2unittest2>=0.5.1Flask-Login>=0.2.11u-msgpack-python>=1.6click>=3.3SQLAlchemy>=0.9.7six>=1.5.0amqp>=1.3.0,<2.0rediskombupsycopg2elasticsearch
{"taskdb": "mysql+taskdb://username:password@host:port/taskdb","projectdb": "mysql+projectdb://username:password@host:port/projectdb","resultdb": "mysql+resultdb://username:password@host:port/resultdb","message_queue": "amqp://username:password@host:port/%2F","webui": {"username": "some_name","password": "some_passwd","need-auth": true}}
# start **only one** scheduler instancepyspider -c config.json scheduler# phantomjspyspider -c config.json phantomjs# start fetcher / processor / result_worker instances as many as your needspyspider -c config.json --phantomjs-proxy="localhost:25555" fetcherpyspider -c config.json processorpyspider -c config.json result_worker# start webui, set `--scheduler-rpc` if scheduler is not running on the same host as webuipyspider -c config.json webui
[group:pyspider]programs=pyspider-fetcher,pyspider-processor,pyspider-phantomjs[program:pyspider-fetcher]command=/usr/local/bin/pyspider -c /pyspider/config.json fetcherautorestart=trueuser=pyspidergroup=pyspiderdirectory=/pyspiderstopasgroup=true[program:pyspider-processor]command=/usr/local/bin/pyspider -c /pyspider/config.json processorautorestart=trueuser=pyspidergroup=pyspiderdirectory=/pyspiderstopasgroup=true[program:pyspider-phantomjs]command=/usr/local/bin/pyspider -c /pyspider/config.json phantomjsautorestart=trueuser=pyspidergroup=pyspiderdirectory=/pyspiderstopasgroup=true