@StrGlee
2016-10-20T19:24:02.000000Z
字数 1653
阅读 938
pyspider
Flask>=0.10
Jinja2>=2.7
chardet>=2.2
cssselect>=0.9
lxml
pycurl
pyquery
requests>=2.2
tornado>=3.2
mysql-connector-python>=1.2.2
pika>=0.9.14
pymongo>=2.7.2
unittest2>=0.5.1
Flask-Login>=0.2.11
u-msgpack-python>=1.6
click>=3.3
SQLAlchemy>=0.9.7
six>=1.5.0
amqp>=1.3.0,<2.0
redis
kombu
psycopg2
elasticsearch
{
"taskdb": "mysql+taskdb://username:password@host:port/taskdb",
"projectdb": "mysql+projectdb://username:password@host:port/projectdb",
"resultdb": "mysql+resultdb://username:password@host:port/resultdb",
"message_queue": "amqp://username:password@host:port/%2F",
"webui": {
"username": "some_name",
"password": "some_passwd",
"need-auth": true
}
}
# start **only one** scheduler instance
pyspider -c config.json scheduler
# phantomjs
pyspider -c config.json phantomjs
# start fetcher / processor / result_worker instances as many as your needs
pyspider -c config.json --phantomjs-proxy="localhost:25555" fetcher
pyspider -c config.json processor
pyspider -c config.json result_worker
# start webui, set `--scheduler-rpc` if scheduler is not running on the same host as webui
pyspider -c config.json webui
[group:pyspider]
programs=pyspider-fetcher,pyspider-processor,pyspider-phantomjs
[program:pyspider-fetcher]
command=/usr/local/bin/pyspider -c /pyspider/config.json fetcher
autorestart=true
user=pyspider
group=pyspider
directory=/pyspider
stopasgroup=true
[program:pyspider-processor]
command=/usr/local/bin/pyspider -c /pyspider/config.json processor
autorestart=true
user=pyspider
group=pyspider
directory=/pyspider
stopasgroup=true
[program:pyspider-phantomjs]
command=/usr/local/bin/pyspider -c /pyspider/config.json phantomjs
autorestart=true
user=pyspider
group=pyspider
directory=/pyspider
stopasgroup=true