监控mysql从机同步状态脚本1.1

  之前写了个一个检查mysql从机的脚本(http://www.simonzhang.net/?p=1823),但是在使用中发现一个问题。如果数据库被重启了,但是同步的没有启动,此脚本检查还是正常,不会进行报警,数据不会同步。
  我做了个调整,每次检查同步主机的pos,通过crontab进行调用,如果多次都没有变化则进行告警。如果10分钟调用一次,设为3次,就是半个小时内没有更新则报警。
crontab配置如下:
*/10 * * * * /bin/bash /script/check_mysql_slave/check_mysql_slave.sh start >/dev/null 2>&1
部分代码如下:

#!/usr/local/bin/python
# -*- coding:utf-8 -*-
# -------------------------------------------------------------------------------
# Filename:    check_nagios.py
# Revision:    1.1
# Date:        2013-06-24
# Author:      simonzhang
# Email:       simon-zzm@163.com
# -------------------------------------------------------------------------------
import os
import pexpect
import time
import smtplib
from email.mime.text import MIMEText

#### base se
mysql_bin = '/program/mysql5/bin/mysql'
mysql_user = 'checkslavestatus'
mysql_pass = 'xxxxxxxxxx'
#设置错多少次开始告警
max_error = 3
mail_host = 'smtp.exmail.qq.com'
mail_user = 'warning@xxx.net'
mail_pwd = 'xxxxxxxxx'
mail_cc = "simon-zzm@163.com"
####

def mail_warn(error_ip):
    content = 'IP %s mysql slave is error!'%error_ip
    msg = MIMEText(content)
    msg['From'] = mail_user
    msg['Subject'] = 'mysql warnning %s'%error_ip
    msg['To'] = mail_to
    try:
        s = smtplib.SMTP()
        s.connect(mail_host)
        s.login(mail_user,mail_pwd)
        s.sendmail(mail_user,[mail_to],msg.as_string())
        s.close()
    except Exception ,e:
        print e

def main():
    error_context = ''
    #读取上次检查master同步点的记录
    try:
        f = open('MasterPos.txt', 'rb').read()
        try:
            old_master_pos = f.split(':')[0]
            error_count = f.split(':')[1]
        except:
            old_master_pos = 0
            error_count = 0
    except:
        old_master_pos = 0
        error_count = 0
        pass
    # 获得数据库同步状态
    status = os.popen("%s -u%s -p%s -e 'show slave status\G'"%
                      (mysql_bin,mysql_user,mysql_pass)).readlines()
    # 查看同步主节点数据
    for status_l in status:
        if status_l.find('Read_Master_Log_Pos: ') > 0:
            f = open('MasterPos.txt', 'wb')
            # 防止出现空值
            try:
                new_master_pos = int(status_l.split(': ')[1])
            except:
                new_master_pos = 0
            if int(new_master_pos) == int(old_master_pos) or int(old_master_pos):
                f.write('%s:%s' % (new_master_pos, int(error_count)+1))
            else:
                f.write('%s:0' % new_master_pos)
            f.close()
            if int(error_count)+1 > max_error:
                error_context += 'slave error!'
    # 判断是否报警
    print error_context:
    if len(error_context) > 1:
        ip = os.popen("/sbin/ifconfig|grep 'inet addr'|awk '{print $2}'").read()
        get_local_ip = ip[ip.find(':')+1:ip.find('n')]
        mail_warn("%s"%get_local_ip)

if __name__ == "__main__":
    main()

源代码

发表评论

电子邮件地址不会被公开。 必填项已用*标注