@Great-Chinese
2017-03-14T05:59:51.000000Z
字数 3548
阅读 1024
Linux监控--nagios
http://ask.apelearn.com/question/7155 # nagios监控搭建参考文献
Nagios是一款开源软件,可以监控网络设备网络流量、Linux/windows主机状态,甚至可以监控打印机。支持web界面配置、管理操作,支持短信、邮件通知,可以自定义脚本实现自定义化监控
# 首先安装epel扩展源
rpm -ivh http://www.lishiming.net/data/attachment/forum/epel-release-5-4_64.noarch.rpm
# 然后分别安装httpd nagios
yum install -y httpd nagios nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe
# 设置登录nagios后台的用户和密码
htpasswd -c /etc/nagios/passwd nagiosadmin
# 检测配置文件
nagios -v /etc/nagios/nagios.cfg
# 启动相关服务
service httpd restart; service nagios start
vim /etc/nagios/nagios.cfg # nagios配置文件
1,在客户端安装软件
# 首先在客户端机器上安装epel扩展源(如果安装了,就不需要再安装)
rpm -ivh http://www.lishiming.net/data/attachment/forum/epel-release-5-4_64.noarch.rpm
# 然后在客户端安装相关软件
yum install -y nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe
# 修改客户端配置文件
vim /etc/nagios/nrpe.cfg # 修改内容如下
allowed_hosts=127.0.0.1,192.168.31.127 #后面的ip为服务端ip
dont_blame_nrpe=1
# 启动客户端
/etc/init.d/nrpe start
2,在监控中心(192.168.31.127)添加被监控主机(192.168.31.116)
# 在服务端进入此目录下
cd /etc/nagios/conf.d/
# 在服务端编辑此文件
vim 192.168.31.116.cfg # 增加内容如下
define host{
use linux-server
host_name 192.168.31.116
alias 0.12
address 192.168.31.116
}
define service{
use generic-service
host_name 192.168.31.116
service_description check_ping
check_command check_ping!100.0,20%!200.0,50%
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.31.116
service_description check_ssh
check_command check_ssh
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.31.116
service_description check_http
check_command check_http
max_check_attempts 5
normal_check_interval 1
}
# 当nagios检测到问题时,一共尝试检测5次都有问题才会告警,如果该数值为1,那么检测到问题立即告警
max_check_attempts 5
# 重新检测的时间间隔,单位是分钟,默认是3分钟
normal_check_interval 1
#在服务出现异常后故障一直没有解决,nagios再次对使用者发出通知的时间。单位是分钟。如果你认为,所有的事件只需要一次通知就够了,可以把这里的选项设为0
notification_interval 60
# 在服务端检测配置文件
nagios -v /etc/nagios/nagios.cfg
# 在服务端重启nagios
service nagios restart
3,在监控中心(192.168.31.127)继续添加被监控主机(192.168.31.116)
# 在服务端定义一个check_nrpe命令
vim /etc/nagios/objects/commands.cfg # 增加内容如下
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
# 然后在客户端定义command命令
vim /etc/nagios/nrpe.cfg # 增加内容如下
command[check_hda1]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1
command[check_hda2]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda2
/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda2 # 查看运行是否正确
# 然后在服务端进入此目录下
cd /etc/nagios/conf.d/
vim 192.168.31.116.cfg # 继续增加内容如下
define service{
use generic-service
host_name 192.168.31.116
service_description check_load
check_command check_nrpe!check_load
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.31.116
service_description check_disk_sda1
check_command check_nrpe!check_hda1
max_check_attempts 5
normal_check_interval 1
}
define service{
use generic-service
host_name 192.168.31.116
service_description check_disk_sda2
check_command check_nrpe!check_hda2
max_check_attempts 5
normal_check_interval 1
}
# 客户端重启nrpe
service nrpe restart
# 服务端重启nagios
service nagios restart
# 在客户端配置邮件告警文件
vim /etc/nagios/objects/contacts.cfg # 增加内容如下
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagiosadmin,melody
}
define contact{
contact_name 123
use generic-contact
alias melody
email 244048927@qq.com
}
define contact{
contact_name 456
use generic-contact
alias gary
email 834865081@qq.com
}
# 然后在服务端的告警文件里面加上contact_groups
vim 192.168.31.116.cfg # 增加最后一句
define service{
use generic-service
host_name 192.168.31.116
service_description check_load
check_command check_nrpe!check_load
max_check_attempts 5
normal_check_interval 1
contact_groups common # 增加最后一句