3月 28

nagios 安装调试

[整理人:张子萌 2010-04]

需要准备软件如下:
centos

nagios-3.2.0
nagios-plugin-1.4.14

除了nagios还需要安装以下软件
apache2 安装路径为/usr/local/apache
perl
gcc 编译工作系统默认即可,如果没有安装推荐使用yum安装。
php nagios3.2页面需要php环境
yum install gcc glibc glibc-common
绘制图表需要安装以下包
freetype-2.3.5.tar.gz
libiconv-1.11.tar.gz
libmcrypt-2.5.7.tar.gz
libpng-1.2.10.tar.bz2
jpegsrc.v6b.tar.gz
gd-2.0.35.tar.gz
可以使用yum或者源码安装。
以上绘图包和apache安装可以参照:http://simon-zzm.blog.163.com/blog/static/88809522201028104721790/

1. 建组建帐号nagios

# groupadd nagios
# groupadd apache
# useradd -g nagios -G apache nagios
# useradd -g apache -G nagios apache

2. 编译安装nagios

# tar zxvf nagios-3.2.0.tar.gz
# cd nagios-3.2.0
# ./configure –prefix=/usr/local/nagios
–with-command-group=nagios
–with-httpd-conf=/usr/local/apache/conf/
# make all
# make install
# make install-init
# make install-config
# make install-commandmode
# make install-webconf

3. 装nagios-plugin-1.4.13

# tar zxvf nagios-plugin-1.4.14.tar.gz
# cd nagios-plugin-1.4.14
# ./configure –prefix=/usr/local/nagios –with-nagios-user=nagios –with-nagios-group=nagios
# make
# make install

注:centos4 上安装时
在checking for redhat spopen problem…停住。
需要添加–enable-redhat-pthread-workaround参数

4. 创建web登录nagios账号

# cd /usr/local/apache/bin
# ./htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin ;创建一个登录WEB的帐号

5. 配置nagios和apache整合
本文使用虚拟机配置。
# cd /usr/local/apache/conf/extra/
# vi httpd-vhosts.conf

配置apache的cgi ,在httpd-vhosts.conf后增加配置如下:

ScriptAlias /nagios/cgi-bin /usr/local/nagios/sbin

Options ExecCGI
AllowOverride None
Order allow,deny
Allow from all
AuthName “Nagios Access”
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user

Alias /nagios /usr/local/nagios/share

Options None
AllowOverride None
Order allow,deny
Allow from all
AuthName “Nagios Access”
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user

首先测试配置文件是否正确
# /usr/local/apache/bin/apachectl configtest

重启apache
# /usr/local/apache/bin/apachectl stop
# /usr/local/apache/bin/apachectl start

6. 启动nagios

测试nagios配置是否可用
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

如果可用启动服务
# /etc/init.d/nagios start

现在你可以通过浏览器访问监控的界面了,http://x.x.x.x/nagios

7. 定义监控组

7.1 建立test监控组,监控192.268.1.103服务器。

在/usr/local/nagios/etc/目录下建立test目录,并定义主机.
监控192.168.1.103服务器的配置文件名为test_1_103.cfg.
# mkdir -p /usr/local/nagios/etc/test
# vi /usr/local/nagios/etc/testgroup.cfg
# 定义主机
define host{
use test-host
alias test Linux Server
address test_1_103; 如多个文件可以用逗号分开
}

7.2 建立监控文件
# vi /usr/local/nagios/etc/test_1_103.cfg
# 定义Ping远程Linux主机
define service{
use test-service ; 这个名字要和监控模板中一样,见7.3
host_name test;
service_description PING;
check_command check_ping!100.0,20%!500.0,60% ;check_ping命令在commands.cfg中定义,后跟两个参数,命令及参数间用!分割。
}
# 检查远程Linux主机根分区使用情况,必须安装nrpe并在/usr/local/nagios/etc/objects/commands.cfg中定义,见第8部分。
define service{
use generic-service ; Name of service template to use
host_name sectop
service_description Root Partition
check_command check_nrpe!check_disk_root
}
# 检查远程Linux主机的登录人数
define service{
use generic-service ; Name of service template to use
host_name sectop
service_description Current Users
check_command check_nrpe!check_users
}
# 检查远程Linux的主机的负载
define service{
use generic-service ; Name of service template to use
host_name sectop
service_description Current Load
check_command check_nrpe!check_load
}
# 检查远程Linux主机swap分区使用情况
define service{
&nb
sp; use generic-service ; Name of service template to use
host_name sectop
service_description Swap Usage
check_command check_nrpe!check_swap
}
# 检查远程Linux主机的SSH服务
define service{
use generic-service ; Name of service template to use
host_name sectop
service_description SSH
check_command check_ssh
notifications_enabled 1 ;0为不发送告警信息 1为发送

}
# 检查远程Linux主机的HTTP服务
define service{
use generic-service ; Name of service template to use
host_name sectop
service_description HTTP
check_command check_http
notifications_enabled 1 ;0为不发送告警信息 1为发送
}

7.3 定义监控组的模板
# vi /usr/local/nagios/etc/objects/templates.cfg

define contact{
name test-contact ; The name of this contact template
service_notification_period 24×7 ; service notifications can be sent anytime
host_notification_period 24×7 ; host notifications can be sent anytime
service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events
host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events
service_notification_commands notify-service-by-email ; send service notifications via email
host_notification_commands notify-host-by-email ; send host notifications via email
register 0 ; DONT REGISTER THIS DEFINITION – ITS NOT A REAL CONTACT, JUST A TEMPLATE!
}

define host{
name test-host ; The name of this host template
notifications_enabled 1 ; Host notifications are enabled
event_handler_enabled 1 ; Host event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
notification_period 24×7 ; Send host notifications at any time
register 0 ; DONT REGISTER THIS DEFINITION – ITS NOT A REAL HOST, JUST A TEMPLATE!
}

define host{
name te
st-server ; The name of this host template
use test-host ; This template inherits other values from the generic-host template
check_period 24×7 ; By default, Linux hosts are checked round the clock
check_interval 5 ; Actively check the host every 5 minutes
retry_interval 1 ; Schedule host check retries at 1 minute intervals
max_check_attempts 10 ; Check each Linux host 10 times (max)
check_command check-host-alive ; Default command to check Linux hosts
notification_period workhours ; Linux admins hate to be woken up, so we only notify during the day
notification_interval 120 ; Resend notifications every 2 hours
notification_options d,u,r ; Only send notifications for specific host states
contact_groups admins ; Notifications get sent to the admins by default
register 0 ; DONT REGISTER THIS DEFINITION – ITS NOT A REAL HOST, JUST A TEMPLATE!
}

define service{
name test-service ; The ‘name’ of this service template
active_checks_enabled 1 ; Active service checks are enabled
passive_checks_enabled 1 ; Passive service checks are enabled/accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems)
obsess_over_service 1 ; We should obsess over this service (if necessary)
check_freshness 0 ; Default is to NOT check service ‘freshness’
notifications_enabled 1 ; Service notifications are enabled
event_handler_enabled 1 ; Service event handler is enabled
flap_detection_enabled 1 ; Flap detection is enabled
failure_prediction_enabled 1 ; Failure prediction is enabled
process_perf_data 1 ; Process performance data
retain_status_information 1 ; Retain status information across program restarts
retain_nonstatus_information 1 ; Retain non-status information across program restarts
is_volatile 0 ; The service is not volatile
check_period 24×7 ; The service can be checked at any time of the day
max_check_attempts 3 ; Re-check the serv
ice up to 3 times in order to determine its final (hard) state
normal_check_interval 10 ; Check the service every 10 minutes under normal conditions
retry_check_interval 2 ; Re-check the service every two minutes until a hard state can be determined
contact_groups admins ; Notifications get sent out to everyone in the ‘admins’ group
notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events
notification_interval 60 ; Re-notify about service problems every hour
notification_period 24×7 ; Notifications can be sent out at any time
register 0 ; DONT REGISTER THIS DEFINITION – ITS NOT A REAL SERVICE, JUST A TEMPLATE!
}

7.4 修改告警模板
# vi vi /usr/local/nagios/etc/objects/contact.cfg
define contact{
contact_name nagiosadmin ; Short name of user
use test-contact ; Inherit default values from generic-contact template (defined above)
alias Nagios Admin ; Full name of user
email admin@mail.com ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ******
}

define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagiosadmin
}

7.5 添加加载项

在nagios.cfg配置文件中开启对/usr/local/nagios/etc/test/加载
cfg_dir=/usr/local/nagios/etc/test/testgroups.cfg
cfg_dir=/usr/local/nagios/etc/test/test_1_103.cfg

测试nagios配置是否可用
# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

如果重启服务
# /etc/init.d/nagios restart

8. 安装nrpe
nrpe用于和远程服务器交互使用。
nrpe分为两部分:一部分是运行在client端。一部分运行在客户端

8.1 nagios服务器上安装
#tar xzvf nrpe-2.12.tar.gz
#cd nrpe-2.12
#./configure
#make all
#make install-plugin #服务器端只要安装nrpe监控插件就行

在/usr/local/nagios/etc/objects/commands.cfg中定义check_nrpe命令
#vi /usr/local/nagios/etc/objects/commands.cfg
添加如下
# ‘check_nrpe ‘ command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}

8.2 在要监控的客户端上
参照 1 中在客户服务器上建立nagios用户组和nagios用户
# tar xzvf nagios-plugins-1.4.14.tar.gz
# cd nagios-plugins-1.4.14
# ./configure –prefix=/usr/local/nagios
# make
# make install
# chown -R nagios:nagios /usr/local/nagios/

# tar xzvf nrpe-2.12.tar.gz
# cd nrpe-2.12
# ./configure –prefix=/usr/local/nagios
# make all
# make install-plugin
# make install-daemon
# make install-daemon-config
修改配置文件
# vi /usr/local/nagios/etc/nrpe.cfg #修改nrpe配置文件,允许Nagios监控服务器监控本机

allowed_hosts=127.0.0.1,192.168.0.19 #此处IP为监控服务器的地址,如多个地址可以使用逗号分隔

启动客户端
#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
查看端口是否已经监听
#netstat -tln #查看端口
tcp 0 0 0.0.0.0:5666 0.0.0.0:*

添加随系统启动,启动nrep客户端
#vi /etc/rc.d/rc.local
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

9. 设置被监控的windows服务器。
找了个NSClient++-windows*.msi的安装包,安装完毕修改配置,在nagios服务器也需要配
置,此处不做详解。

附录:
1. 配置nagios及其plugins

/usr/local/nagios/etc下的文件
#控制cgi访问的配置文件
cgi.cfg

#Nagios主配置文件
nagios.cfg

#resource.cfg定义了一些变量,以便被其它文件引用,如$USER1$
resource.cfg

# objects是一个目录,用于定义Nagios对象
objects

/usr/local/nagios/etc/objects下的文件

#命令