6月 24

监控mysql从机同步状态脚本1.1

  之前写了个一个检查mysql从机的脚本(http://www.simonzhang.net/?p=1823),但是在使用中发现一个问题。如果数据库被重启了,但是同步的没有启动,此脚本检查还是正常,不会进行报警,数据不会同步。
  我做了个调整,每次检查同步主机的pos,通过crontab进行调用,如果多次都没有变化则进行告警。如果10分钟调用一次,设为3次,就是半个小时内没有更新则报警。
crontab配置如下:
*/10 * * * * /bin/bash /script/check_mysql_slave/check_mysql_slave.sh start >/dev/null 2>&1
部分代码如下:

#!/usr/local/bin/python
# -*- coding:utf-8 -*-
# -------------------------------------------------------------------------------
# Filename:    check_nagios.py
# Revision:    1.1
# Date:        2013-06-24
# Author:      simonzhang
# Email:       simon-zzm@163.com
# -------------------------------------------------------------------------------
import os
import pexpect
import time
import smtplib
from email.mime.text import MIMEText

#### base se
mysql_bin = '/program/mysql5/bin/mysql'
mysql_user = 'checkslavestatus'
mysql_pass = 'xxxxxxxxxx'
#设置错多少次开始告警
max_error = 3
mail_host = 'smtp.exmail.qq.com'
mail_user = 'warning@xxx.net'
mail_pwd = 'xxxxxxxxx'
mail_cc = "simon-zzm@163.com"
####

def mail_warn(error_ip):
    content = 'IP %s mysql slave is error!'%error_ip
    msg = MIMEText(content)
    msg['From'] = mail_user
    msg['Subject'] = 'mysql warnning %s'%error_ip
    msg['To'] = mail_to
    try:
        s = smtplib.SMTP()
        s.connect(mail_host)
        s.login(mail_user,mail_pwd)
        s.sendmail(mail_user,[mail_to],msg.as_string())
        s.close()
    except Exception ,e:
        print e

def main():
    error_context = ''
    #读取上次检查master同步点的记录
    try:
        f = open('MasterPos.txt', 'rb').read()
        try:
            old_master_pos = f.split(':')[0]
            error_count = f.split(':')[1]
        except:
            old_master_pos = 0
            error_count = 0
    except:
        old_master_pos = 0
        error_count = 0
        pass
    # 获得数据库同步状态
    status = os.popen("%s -u%s -p%s -e 'show slave status\G'"%
                      (mysql_bin,mysql_user,mysql_pass)).readlines()
    # 查看同步主节点数据
    for status_l in status:
        if status_l.find('Read_Master_Log_Pos: ') > 0:
            f = open('MasterPos.txt', 'wb')
            # 防止出现空值
            try:
                new_master_pos = int(status_l.split(': ')[1])
            except:
                new_master_pos = 0
            if int(new_master_pos) == int(old_master_pos) or int(old_master_pos):
                f.write('%s:%s' % (new_master_pos, int(error_count)+1))
            else:
                f.write('%s:0' % new_master_pos)
            f.close()
            if int(error_count)+1 > max_error:
                error_context += 'slave error!'
    # 判断是否报警
    print error_context:
    if len(error_context) > 1:
        ip = os.popen("/sbin/ifconfig|grep 'inet addr'|awk '{print $2}'").read()
        get_local_ip = ip[ip.find(':')+1:ip.find('n')]
        mail_warn("%s"%get_local_ip)

if __name__ == "__main__":
    main()

源代码

5月 08

raspberry pi上测试pypy效果

PyPy 2.0 alpha for ARM 发布

官方网页
http://pypy.org/download.html

有专门针对raspberry pi的版本。来简单做个测试。
我这竟还要翻墙下回来。
安装很顺利。
$sudo dpkg -i pypy-upstream_2.0~alpha+arm_armhf.deb

开始测试一下效果,直接到oschina拷贝“ruby太慢的”python版测试一下。

import time
start=time.time()
all=xrange(1,10**7)
def check(num):
    a=list(str(num))
    b=a[::-1]
    if a==b:
        return True
    return False
for i in all:
    if check(i):
        if check(i**2):
            print i,i**2
 
end=time.time()
print end-start

开始测试
直接用python测试
time python test.py
。。。

real 8m13.570s
user 8m3.490s
sys 0m0.560s

使用pypy测试
time pypy test.py
。。。

real 1m45.521s
user 1m44.230s
sys 0m0.150s

结论使用pypy处理运算速度比python高出差不多7倍的效果。

4月 18

监控mysql从机同步状态脚本

  mysql数据库主从运行。为了知道从机的同步情况,写了个脚本,放在crontab中,如果同步出错,则邮件报警。去年写的,放上来做个备忘。

#!/usr/local/bin/python
# -------------------------------------------------------------------------------
# Filename:    .py
# Revision:    1.0
# Date:        2012-03-20
# Author:      simonzhang
# Web:         www.simonzhang.net
# Email:       simon-zzm@163.com
# -------------------------------------------------------------------------------
import os
import pexpect
import time
import smtplib
from email.mime.text import MIMEText

#### base se
mysql_bin = '/mysql5/bin/mysql'
mysql_user = ''
mysql_pass = ''
mail_host = 'smtp.exmail.qq.com'
mail_user = 'XXX@XXX.net'
mail_pwd = 'XXXX'
mail_to = "xxxxx@xxx.com"
####

def mail_warn(error_ip):
    content = 'IP %s mysql slave is error!'%error_ip
    msg = MIMEText(content)
    msg['From'] = mail_user
    msg['Subject'] = 'mysql warnning %s'%error_ip
    msg['To'] = mail_to
    try:
        s = smtplib.SMTP()
        s.connect(mail_host)
        s.login(mail_user,mail_pwd)
        s.sendmail(mail_user,[mail_to],msg.as_string())
        s.close()
    except Exception ,e:
        print e

def main():
    status = os.popen("%s -u%s -p%s -e 'show slave status\G'"%
                      (mysql_bin,mysql_user,mysql_pass)).read() 
    io_status = status[status.find('Slave_IO_Running: ')+18]
    sql_status = status[status.find('Slave_SQL_Running: ')+19]
    if (io_status == 'Y') or (sql_status == 'Y'):
        ip = os.popen("/sbin/ifconfig|grep 'inet addr'|awk '{print $2}'").read()
        get_local_ip = ip[ip.find(':')+1:ip.find('n')]
        mail_warn("%s"%get_local_ip)

if __name__ == "__main__":
    main()
3月 27

不明IP登陆linux服务器时邮件通知

  当有人登陆服务器时希望能知道是那个IP在登陆,如果是黑客登陆了也能及时知道。
  处理方法是,python时时检查linux下 secure 文件,如果是登陆成功的就发邮件。然后用shell脚本调用py启停,然后配置crontab定时检查py是否运行,如果检查进行不在则进行启动。
  python 脚本

#!/bin/python
#-*- coding:utf-8 -*-
# Filename:
# Revision:    1.0
# Date:        2013-3-26
# Author:      simonzhang
# web:         www.simonzhang.net
# Email:       simon-zzm@163.com
### END INIT INFO
import re
import os
import time

import smtplib
from email.mime.text import MIMEText

#### 基础设置
mail_host = 'smtp.exmail.qq.com'
mail_user = 'warning'
mail_pwd = 'aaa'
mail_to = "simon-zzm@163.com"
mail_cc = "simon-zzm@"
secure_file = '/var/log/secure'
conn_fail_key = 'Failed password'
conn_access_key = 'Accepted password'
exclude_ip = ['192.168.100.8', \
              '192.168.100.10', \
              '192.168.100.11']
my_path = os.getcwd()


####
def mail_send(text):
    content = '%s' % text
    msg = MIMEText(content)
    msg['From'] = mail_user
    msg['Subject'] = 'access login server'
    msg['To'] = mail_to
    try:
        s = smtplib.SMTP()
        s.connect(mail_host)
        s.login(mail_user, mail_pwd)
        s.sendmail(mail_user, [mail_to, mail_cc], msg.as_string())
        s.close()
    except Exception, e:
        print e

####
# get local host ip
####
def get_ip_address():
    ip = os.popen("/sbin/ifconfig | grep 'inet addr' | awk '{print $2}'").read()
    ip = ip[ip.find(':')+1:ip.find('\n')]
    return ip

#### 
def parse_secure(_data):
    # 获取IP地址
    re_ip = re.compile(r'''\d{2,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}''',re.DOTALL)
    try:
        get_ip = re_ip.findall(_data)[0]
    except:
        get_ip = ''
    #### 获得关键字位置
    try:
        key_index = _data.index(conn_access_key)
    except:
        key_index = 0
    #### 将正常登陆但不在已知范围的IP获取
    if (get_ip not in exclude_ip) and (get_ip != '') and (key_index > 0):
        return get_ip
    else:
        return ''


def main():
    #### 获取上次分析的最后一行
    try:
        last_line = open('%s/lastline.txt' % my_path, 'rb').readlines()[0][:-1]
    except:
        last_line = ''
    #### 打开安全日志,并记录最后一行
    get_secure = open(secure_file, 'rb').readlines()
    write_line = open('%s/lastline.txt' % my_path, 'wb')
    write_line.write(get_secure[-1])
    write_line.close()
    #### 获取没有处理的数据,如果最后一行为空着处理全部数据。
    if last_line != "":
        last_id = get_secure.index("%s\n" % last_line)
        get_secure = get_secure[last_id + 1:]
    #### 开始处理数据,只处理登陆成功和登陆失败部分数据
    access_login_list = ''
    for _get in get_secure:
        re_get_ip = parse_secure(_get)
        if re_get_ip != '':
            access_login_list += "%s " % re_get_ip
    #### 判断是否需要报警
    if len(access_login_list) > 1:
        mail_send("%s access login %s server" % (access_login_list, get_ip_address())) 


if __name__ == '__main__':
    while True:
        main()
        time.sleep(3)

  shell 脚本

#! /bin/bash
#
# make simon-zzm@163.com
#
#
### END INIT INFO

# Source function library.
. /etc/profile
cd `pwd`
key=monitoring_secure.py
# See how we were called.
case "$1" in
  stop)
     /bin/ps -ef|grep "${key}"|grep -v grep |awk ' ''{print $2}'|xargs kill -9
     ;;
  start)
      /usr/local/bin/python ${key} &
      ;;
  restart)
      stop
      sleep 1
      /usr/local/bin/python ${key} &
      ;;
  check)
      process_count=`/bin/ps -ef|grep "${key}"|grep -v grep|wc -l`
      case "${process_count}" in
          0)
            $0 start
            ;;
          1)
            ;;
          *)
           $0 stop
           sleep 1
           $0 start
           ;;
      esac
      ;;
  status)
      /bin/ps -ef|grep "${key}"|grep -v grep 
      ;;
  *)
        echo $"Usage: $0 {stop|start|restart|check|status}"
        exit 1
esac

exit

  crontab的配置,每8个小时检查一次。
0 */8 * * * /bin/sh /program/script/check_login_user/monitoring_secure.sh check >/dev/null 2>&1
代码下载

3月 08

linux下查看使用硬盘IO过高的进程

  服务器cpu使用率不高,load比较高,所以要查看一下IO。硬盘IO可以通过命令vmstat或iostat获得(也可以用yum 安装dstat获得),网络IO可以用iftop命令获取。但是不知道那个进程使用硬盘IO比较高,通过查找没有找到相关命令,只好自己写个脚本进行统计处理。
  本脚本在CentOS6下(kernel2.6以上)python2.6测试通过。
  直接运行脚本,默认情况下收集3秒钟数据,显示读写最高的前三个进程。如用参数可以使用命令“python fhip.py 4 5 3”,第一个数位每次收集读写数据的间隔秒数,第二个数是打印出读写最多的n个进程,第三个为运行脚本的次数。因为参数部分写的比较简单那,所以用参数必须3个全写。

#!/bin/python 
#-*- coding:utf-8 -*- 
# Filename:    find_high_io_process
# Revision:    1.0 
# Date:        2013-3-8 
# Author:      simonzhang 
# web:         www.simonzhang.net 
# Email:       simon-zzm@163.com 
### END INIT INFO
import os
import re
import sys
import time
from string import strip

####
sys_proc_path = '/proc/'
re_find_process_number = '^\d+$'

####
# 通过/proc/$pid/io获取读写信息
####
def collect_info():
    _tmp = {}
    re_find_process_dir = re.compile(re_find_process_number)
    for i in os.listdir(sys_proc_path):
        if re_find_process_dir.search(i):
            # 获得进程名
            process_name = open("%s%s/stat" % (sys_proc_path, i), "rb").read().split(" ")[1]
            # 读取io信息
            rw_io = open("%s%s/io" % (sys_proc_path, i), "rb").readlines()
            for _info in rw_io:
                cut_info = strip(_info).split(':')
                if strip(cut_info[0]) == "read_bytes":
                    read_io = int(strip(cut_info[1]))
                if strip(cut_info[0]) == "write_bytes":
                    write_io = int(strip(cut_info[1]))
            _tmp[i] = {"name":process_name, "read_bytes":read_io, "write_bytes":write_io}
    return _tmp


def main(_sleep_time, _list_num):
    _sort_read_dict = {}
    _sort_write_dict = {}
    # 获取系统读写数据
    process_info_list_frist = collect_info()
    time.sleep(_sleep_time)
    process_info_list_second = collect_info()
    # 将读数据和写数据进行分组,写入两个字典中
    for loop in process_info_list_second.keys():
        second_read_v = process_info_list_second[loop]["read_bytes"]
        second_write_v = process_info_list_second[loop]["write_bytes"]
        try:
            frist_read_v = process_info_list_frist[loop]["read_bytes"]
        except:
            frist_read_v = 0
        try:
            frist_write_v = process_info_list_frist[loop]["write_bytes"]
        except:
            frist_write_v = 0
        # 计算第二次获得数据域第一次获得数据的差
        _sort_read_dict[loop] = second_read_v - frist_read_v
        _sort_write_dict[loop] = second_write_v - frist_write_v
    # 将读写数据进行排序
    sort_read_dict = sorted(_sort_read_dict.items(),key=lambda _sort_read_dict:_sort_read_dict[1],reverse=True)
    sort_write_dict = sorted(_sort_write_dict.items(),key=lambda _sort_write_dict:_sort_write_dict[1],reverse=True)
    # 打印统计结果
    print "pid     process     read(bytes) pid     process     write(btyes)"
    for _num in range(_list_num):
        read_pid = sort_read_dict[_num][0]
        write_pid = sort_write_dict[_num][0]
        res = "%s" % read_pid
        res += " " * (8 - len(read_pid)) + process_info_list_second[read_pid]["name"]
        res += " " * (12 - len(process_info_list_second[read_pid]["name"])) + "%s" % sort_read_dict[_num][1]
        res += " " * (12 - len("%s" % sort_read_dict[_num][1])) + write_pid
        res += " " * (8 - len(write_pid)) + process_info_list_second[write_pid]["name"]
        res += " " * (12 - len("%s" % process_info_list_second[write_pid]["name"])) + "%s" % sort_write_dict[_num][1]
        print res
    print "\n" * 1


if __name__ == '__main__':
    try:
        _sleep_time = sys.argv[1]
    except:
        _sleep_time = 3
    try:
        _num = sys.argv[2]
    except:
        _num = 3
    try:
        loop = sys.argv[3]
    except:
        loop = 1
    for i in range(int(loop)):
        main(int(_sleep_time), int(_num))

linux查找IO高的进程的源码