3月 27

不明IP登陆linux服务器时邮件通知

Posted on 2013 年 3 月 27 日 by 张子萌

　　当有人登陆服务器时希望能知道是那个IP在登陆，如果是黑客登陆了也能及时知道。
　　处理方法是，python时时检查linux下 secure 文件，如果是登陆成功的就发邮件。然后用shell脚本调用py启停，然后配置crontab定时检查py是否运行，如果检查进行不在则进行启动。
　　python 脚本

#!/bin/python
#-*- coding:utf-8 -*-
# Filename:
# Revision:    1.0
# Date:        2013-3-26
# Author:      simonzhang
# web:         www.simonzhang.net
# Email:       simon-zzm@163.com
### END INIT INFO
import re
import os
import time

import smtplib
from email.mime.text import MIMEText

#### 基础设置
mail_host = 'smtp.exmail.qq.com'
mail_user = 'warning'
mail_pwd = 'aaa'
mail_to = "simon-zzm@163.com"
mail_cc = "simon-zzm@"
secure_file = '/var/log/secure'
conn_fail_key = 'Failed password'
conn_access_key = 'Accepted password'
exclude_ip = ['192.168.100.8', \
              '192.168.100.10', \
              '192.168.100.11']
my_path = os.getcwd()


####
def mail_send(text):
    content = '%s' % text
    msg = MIMEText(content)
    msg['From'] = mail_user
    msg['Subject'] = 'access login server'
    msg['To'] = mail_to
    try:
        s = smtplib.SMTP()
        s.connect(mail_host)
        s.login(mail_user, mail_pwd)
        s.sendmail(mail_user, [mail_to, mail_cc], msg.as_string())
        s.close()
    except Exception, e:
        print e

####
# get local host ip
####
def get_ip_address():
    ip = os.popen("/sbin/ifconfig | grep 'inet addr' | awk '{print $2}'").read()
    ip = ip[ip.find(':')+1:ip.find('\n')]
    return ip

#### 
def parse_secure(_data):
    # 获取IP地址
    re_ip = re.compile(r'''\d{2,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}''',re.DOTALL)
    try:
        get_ip = re_ip.findall(_data)[0]
    except:
        get_ip = ''
    #### 获得关键字位置
    try:
        key_index = _data.index(conn_access_key)
    except:
        key_index = 0
    #### 将正常登陆但不在已知范围的IP获取
    if (get_ip not in exclude_ip) and (get_ip != '') and (key_index > 0):
        return get_ip
    else:
        return ''


def main():
    #### 获取上次分析的最后一行
    try:
        last_line = open('%s/lastline.txt' % my_path, 'rb').readlines()[0][:-1]
    except:
        last_line = ''
    #### 打开安全日志，并记录最后一行
    get_secure = open(secure_file, 'rb').readlines()
    write_line = open('%s/lastline.txt' % my_path, 'wb')
    write_line.write(get_secure[-1])
    write_line.close()
    #### 获取没有处理的数据，如果最后一行为空着处理全部数据。
    if last_line != "":
        last_id = get_secure.index("%s\n" % last_line)
        get_secure = get_secure[last_id + 1:]
    #### 开始处理数据,只处理登陆成功和登陆失败部分数据
    access_login_list = ''
    for _get in get_secure:
        re_get_ip = parse_secure(_get)
        if re_get_ip != '':
            access_login_list += "%s " % re_get_ip
    #### 判断是否需要报警
    if len(access_login_list) > 1:
        mail_send("%s access login %s server" % (access_login_list, get_ip_address())) 


if __name__ == '__main__':
    while True:
        main()
        time.sleep(3)

　　shell 脚本

#! /bin/bash
#
# make simon-zzm@163.com
#
#
### END INIT INFO

# Source function library.
. /etc/profile
cd `pwd`
key=monitoring_secure.py
# See how we were called.
case "$1" in
  stop)
     /bin/ps -ef|grep "${key}"|grep -v grep |awk ' ''{print $2}'|xargs kill -9
     ;;
  start)
      /usr/local/bin/python ${key} &
      ;;
  restart)
      stop
      sleep 1
      /usr/local/bin/python ${key} &
      ;;
  check)
      process_count=`/bin/ps -ef|grep "${key}"|grep -v grep|wc -l`
      case "${process_count}" in
          0)
            $0 start
            ;;
          1)
            ;;
          *)
           $0 stop
           sleep 1
           $0 start
           ;;
      esac
      ;;
  status)
      /bin/ps -ef|grep "${key}"|grep -v grep 
      ;;
  *)
        echo $"Usage: $0 {stop|start|restart|check|status}"
        exit 1
esac

exit

　　crontab的配置，每8个小时检查一次。
0 */8 * * * /bin/sh /program/script/check_login_user/monitoring_secure.sh check >/dev/null 2>&1
代码下载

3月 08

linux下查看使用硬盘IO过高的进程

Posted on 2013 年 3 月 8 日 by 张子萌

　　服务器cpu使用率不高，load比较高，所以要查看一下IO。硬盘IO可以通过命令vmstat或iostat获得（也可以用yum 安装dstat获得），网络IO可以用iftop命令获取。但是不知道那个进程使用硬盘IO比较高，通过查找没有找到相关命令，只好自己写个脚本进行统计处理。
　　本脚本在CentOS6下（kernel2.6以上）python2.6测试通过。
　　直接运行脚本，默认情况下收集3秒钟数据，显示读写最高的前三个进程。如用参数可以使用命令“python fhip.py 4 5 3”，第一个数位每次收集读写数据的间隔秒数，第二个数是打印出读写最多的n个进程，第三个为运行脚本的次数。因为参数部分写的比较简单那，所以用参数必须3个全写。

#!/bin/python 
#-*- coding:utf-8 -*- 
# Filename:    find_high_io_process
# Revision:    1.0 
# Date:        2013-3-8 
# Author:      simonzhang 
# web:         www.simonzhang.net 
# Email:       simon-zzm@163.com 
### END INIT INFO
import os
import re
import sys
import time
from string import strip

####
sys_proc_path = '/proc/'
re_find_process_number = '^\d+$'

####
# 通过/proc/$pid/io获取读写信息
####
def collect_info():
    _tmp = {}
    re_find_process_dir = re.compile(re_find_process_number)
    for i in os.listdir(sys_proc_path):
        if re_find_process_dir.search(i):
            # 获得进程名
            process_name = open("%s%s/stat" % (sys_proc_path, i), "rb").read().split(" ")[1]
            # 读取io信息
            rw_io = open("%s%s/io" % (sys_proc_path, i), "rb").readlines()
            for _info in rw_io:
                cut_info = strip(_info).split(':')
                if strip(cut_info[0]) == "read_bytes":
                    read_io = int(strip(cut_info[1]))
                if strip(cut_info[0]) == "write_bytes":
                    write_io = int(strip(cut_info[1]))
            _tmp[i] = {"name":process_name, "read_bytes":read_io, "write_bytes":write_io}
    return _tmp


def main(_sleep_time, _list_num):
    _sort_read_dict = {}
    _sort_write_dict = {}
    # 获取系统读写数据
    process_info_list_frist = collect_info()
    time.sleep(_sleep_time)
    process_info_list_second = collect_info()
    # 将读数据和写数据进行分组，写入两个字典中
    for loop in process_info_list_second.keys():
        second_read_v = process_info_list_second[loop]["read_bytes"]
        second_write_v = process_info_list_second[loop]["write_bytes"]
        try:
            frist_read_v = process_info_list_frist[loop]["read_bytes"]
        except:
            frist_read_v = 0
        try:
            frist_write_v = process_info_list_frist[loop]["write_bytes"]
        except:
            frist_write_v = 0
        # 计算第二次获得数据域第一次获得数据的差
        _sort_read_dict[loop] = second_read_v - frist_read_v
        _sort_write_dict[loop] = second_write_v - frist_write_v
    # 将读写数据进行排序
    sort_read_dict = sorted(_sort_read_dict.items(),key=lambda _sort_read_dict:_sort_read_dict[1],reverse=True)
    sort_write_dict = sorted(_sort_write_dict.items(),key=lambda _sort_write_dict:_sort_write_dict[1],reverse=True)
    # 打印统计结果
    print "pid     process     read(bytes) pid     process     write(btyes)"
    for _num in range(_list_num):
        read_pid = sort_read_dict[_num][0]
        write_pid = sort_write_dict[_num][0]
        res = "%s" % read_pid
        res += " " * (8 - len(read_pid)) + process_info_list_second[read_pid]["name"]
        res += " " * (12 - len(process_info_list_second[read_pid]["name"])) + "%s" % sort_read_dict[_num][1]
        res += " " * (12 - len("%s" % sort_read_dict[_num][1])) + write_pid
        res += " " * (8 - len(write_pid)) + process_info_list_second[write_pid]["name"]
        res += " " * (12 - len("%s" % process_info_list_second[write_pid]["name"])) + "%s" % sort_write_dict[_num][1]
        print res
    print "\n" * 1


if __name__ == '__main__':
    try:
        _sleep_time = sys.argv[1]
    except:
        _sleep_time = 3
    try:
        _num = sys.argv[2]
    except:
        _num = 3
    try:
        loop = sys.argv[3]
    except:
        loop = 1
    for i in range(int(loop)):
        main(int(_sleep_time), int(_num))

linux查找IO高的进程的源码

2月 21

服务器自动封锁和解封IP 对付攻击

Posted on 2013 年 2 月 21 日 by 张子萌

　　服务器流量暴涨，首先通过访问日志查看。每个IP并发不是很高，分布也很广，但是都是只访问主页，可见不是抓取服务导致。通过“netstat -nat|awk ‘{print awk $NF}’|sort|uniq -c|sort -n”t命令查看，连接中还有很多SYN_RECV、TIME_WAIT、ESTABLISHED。看来是小型的攻击。首先是修改web服务的配置，缩减了timeout时间，在nginx上禁止一部分并发比较高的IP，小改系统参数。流量降了20M，但还是太高。nginx虽然禁止了IP，如果用浏览看是一个空白页面，但是每个连接还会产生很小的流量。最后决定将最小的返回值也封掉，直接在iptables上drop掉连接。每个超限IP暂定封2天。写个脚本自动运行。

　　此脚本在在centos5.3+python2.6.6使用通过。需要注意，此脚本统计日志完毕会将nginx日志文件清空，如需保留日志，请自行修改。每次加载iptables规则时会清楚上次所有规则，如果有使用其它规则也要自行处理。

#!/bin/python
#-*- coding:utf-8 -*-
# Filename:    drop_ip_iptables.py
# Revision:    1.0
# Date:        2013-2-21
# Author:      simonzhang
# web:         www.simonzhang.net
# Email:       simon-zzm@163.com
### END INIT INFO
import os
import time
from string import strip


#### 参数和脚本中使用到的系统命令
nginx_log = "/usr/local/nginx/logs/nginx.access.log"
# 统计nginx日志中IP访问数量
check_comm = "/bin/cat %s |awk ' ''{print $1}'|sort |uniq -c|sort -n -k1 -r" % nginx_log
# 放在crontab中10分钟跑一次，访问超出n次的全部封掉
overproof = 3000
# 被封地址记录文件
# 文件中记录封IP时间和IP地址。时间单位为秒
lock_ip_list = "/usr/local/nginx/logs/lock_ip_list.txt"
# 被封地址解开时间。时间单位为秒
unlock_time = 3600*24*2 


def manage_lock_ip():
    # 获取当前时间
    get_now_time = int(time.time())
    # 管理日志字典
    man_ip = {}
    # 处理日志中的IP
    try:
        log_file = open('%s' % lock_ip_list, 'rb').readlines()
        for get_ip_info in log_file:
            _get_ip_info = strip(get_ip_info).split(' ')
            man_ip['%s' % _get_ip_info[1]] = int(_get_ip_info[0])
    except:
        exit(0)
    # 清空iptable列表,和ip记录日志
    os.popen('/sbin/iptables -F')
    clean_file = open('%s' % lock_ip_list, 'rb+')
    clean_file.truncate()
    # 开始处理IP,被封没有超时的IP写入iptables和日志中
    log_file = open('%s' % lock_ip_list, 'ab')
    for loop_ip in man_ip.keys():
        if (get_now_time - man_ip[loop_ip]) < unlock_time:
            os.popen('/sbin/iptables -I INPUT -s %s -j DROP' % loop_ip)
            log_file.write('%s %s\n' % (man_ip[loop_ip], loop_ip))
    log_file.close()
        


def main():
    # 已封IP地址字典
    drop_ip_list = {}
    # 加载已封IP日志
    try:
        log_file = open('%s' % lock_ip_list, 'rb').readlines()
        for get_drop_ip_info in log_file:
            _get_drop_ip_info = strip(get_drop_ip_info).split(' ')
            drop_ip_list['%s' % _get_drop_ip_info[1]] = int(_get_drop_ip_info[0])
    except:
        os.mknod('%s' % lock_ip_list)
    # 获取nginx日志中的访问超高的ip并写入日志
    access_high_ip = os.popen('%s' % check_comm).readlines()
    for get_ip_count in access_high_ip:
        try :
            _get_ip_count = strip(get_ip_count).split(' ')
            _get_ip = _get_ip_count[1]
            _get_count = _get_ip_count[0]
        except:
            pass
        if (int(_get_count) > int(overproof)) and (_get_ip not in drop_ip_list.keys()):
            now_time = int(time.time())
            log_file = open('%s' % lock_ip_list, 'ab+')
            log_file.write('%s %s\n' % (now_time, _get_ip))
            log_file.close()
    # 统计完毕清空nginx日志
    log_file = open('%s' % nginx_log, 'wb')
    log_file.truncate()
    # 处理要封的IP和要解开的IP
    manage_lock_ip()


if __name__ == '__main__':
    main()

再补充两条日志分析命令
# 单位时间内统计单个IP只访问首页的数量：
#check_comm = “/bin/cat %s|grep ‘GET / HTTP/1.1’|awk ‘ ”{print $1}’|sort |uniq -c|sort -n -k1 -r” % nginx_log
# 单位时间内统计单个IP访问相同页面的数量
#check_comm = “/bin/cat %s|awk -F'”‘ ‘{print $1 $2}’|awk ‘ ”{print $1″ “$6” “$7” “$8}’|sort -k2,4 -r|uniq -c|sort -n -k1 -r” % nginx_log

源码下载
drop_ip_tables

1月 10

我的用 raspberry pi 手工制作的小车（一）

Posted on 2013 年 1 月 10 日 by 张子萌

　　首先切割了一块，20x20cm的2mm厚的亚克力板。也许是手磨速度问题，切割时比较粘刀，所以切得很不整齐。

　　我用边角料给超声波测距模块做了个架子。

　　底层方的东西还不是很多。包含TT电机和H桥驱动，步进电机驱动板，红外测距模块和接线板。

我的raspberry pi小车下层。包含TT电机和H桥驱动，步进电机驱动板，红外测距模块。

　　上层东西也不多，raspberry pi的板子占了一大块。剩下的摄像头，步进电机和超声波测距模块。

我的raspberry pi 小车的上层。包含步进电机，和raspberry pi的主板。

　　来张正面照。摄像头拍的照片可以通过浏览器访问。

我的raspberry pi小车的正面照。

　　当前程序已经测试完成，在桌子上跑问题也不大。但是存在一个问题。使用电池驱动时，程序一启动系统就重启了，应该是电压或电流不稳定。正在改进中。
　　当前费用统计。

成本统计
产品名　　　　　　　　　　　　　数量　　　　单价（元）　　　总价（元）
raspberry pi 2.0　　　　　　1　　　　　　341　　　　　　　341
2mm厚200x200mm 亚克力板　　　1　　　　　　7 　　　　　　　7
HC-SR04超声波测距模块　　　　1　　　　　　7.3　　　　　　　7.3
1/16 5V 4相 5线步进电机　　　1　　　　　　5.8　　　　　　　5.8
五线四相步进电机驱动板　　　　1　　　　　　3.2　　　　　　　3.2
万向轮尼龙轮子　　　　　　　　1　　　　　4　　　　　　　　　4
TT马达+车轮套装　　　　　　　　2　　　　　8.1　　　　　　　16.2
usb接口　　　　　　　　　　　　1　　　　　1　　　　　　　　1
SYB-170 面包板　　　　　　　　1　　　　　2.8　　　　　　　2.8
两路H桥IO口电机驱动板　　　　　1　　　　　8　　　　　　　　8
红外壁障模块　　　　　　　　　　2　　　　　8　　　　　　　　16
摄像头　　　　　　　　　　　　　1　　　　利旧设备　 0
总计：412.3

小配件大约数量
螺丝部分均为M3
6mm铜柱　　　12个
20mm铜柱　　4个
15mm铜柱　　4个
螺丝1mm　　　12个
螺丝10mm　　2个
螺母　　　　20个

线才大约数量
双母头杜邦线20CM　　20根
双公头杜邦线20CM　　10根
公母头杜邦线20CM　　10根
排插端子　　　　　　1组

小配件加线材大于25元

1月 08

python 连接hbase存、取图片

Posted on 2013 年 1 月 8 日 by 张子萌

　　连接hbase1.0.4需要使用Thrift，我用的是python2.6。
　　安装thrift。下载地址https://dist.apache.org/repos/dist/release/thrift/0.9.0/thrift-0.9.0.tar.gz解压后安装命令。
在hbase服务器上，确保hbase服务已经启动。在thrift目录中，用管理员运行一下命令安装。
./configure
make
make install

　　安装完毕生成hbase的client代码命令格式如下，
thrift –gen
登陆到hbase的权限进入
$ cd hbase/src/main/resources/org/apache/hadoop/hbase/thrift

生成python的
$ thrift –gen py Hbase.thrift
再生成一个C的学习备用，与本文无关
$ thrift –gen c_glib Hbase.thrift

将gen-py文件夹下的hbase文件夹拷贝到要连接hbase的服务器的python目录下，我用的是python2.6，自己手动安装的。命令如下
cp -R hbase /usr/local/lib/python2.6/site-packages/

拷贝完毕用import导入 hbase成功。开始写代码了。参考hbase里的例子在hbase/src/examples/中。

　　我的任务就是把某个目录下，以jpg结尾的图片放到hbase里，因为图片名没有重复，所以用图片名做row name。hbase手动建表’hbase(main):013:0> create ‘img’, ‘data:”。

　　首先统计一下照片的数量。这个image目录下只有jpg的图片，使用匹配只是备将来使用。下面只是测试脚本，不关心业务逻辑。

# find /image/ -name \*.jpg -type f |wc -l
13140

# du -s -h /image
303M /image/

　　本地共有13140张照片共303M，写入hbase测试脚本如下：

#!/bin/bash 
# -------------------------------
# Revision:
# Date:        2012-12-11 
# Author:      simonzhang 
# Email:       simon-zzm@163.com 
# Web:         www.simonzhang.net 
# -------------------------------

import os
import re

from thrift.transport import TSocket  
from thrift.transport import TTransport  
from thrift.protocol import TBinaryProtocol  
   
from hbase import Hbase  
from hbase.ttypes import *

#### base set
find_path=(r'/image/',
           )

class HbaseWrite():
    def __init__(self):
        self.tableName = 'img'
        self.transport = TSocket.TSocket('192.168.100.100', 9090)
        self.transport = TTransport.TBufferedTransport(self.transport)
        self.transport.open()
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.client = Hbase.Client(self.protocol)

    def createTable(self, tableName):
        col1 = ColumnDescriptor(name="data:",maxVersions=1)
        self.client.createTable(tableName,[col1])

    def write(self, PicPath, PicName):
        row = PicName.split('.')[0]
        _data = PicName.split('.')[1]
        PicData = open('%s/%s' % (PicPath, PicName), 'rb').read()
        # 此处需要注意格式，网上的格式报错，少个参数报错如下
        # TypeError: mutateRow() takes exactly 5 arguments (4 given)
        self.client.mutateRow(self.tableName, row, [Mutation(column="data:%s" % _data, value=PicData)], {})

    def read(self, tableName, PicName):
        row = PicName.split('.')[0]
        data_type = PicName.split('.')[1]
        get_data = self.client.get(tableName, row, 'data:%s' % data_type, {})[0]
        if get_data:
            return get_data.value
        else:
            return "Error"


def main(_path):
    WHB = HbaseWrite()
    WHB.createTable()
    find_file=re.compile(r"^[0-9a-z]*.jpg$")
    find_walk=os.walk(_path)
    for path,dirs,files in find_walk:
        for f in files:
            if find_file.search(f):
                path_name=path
                file_name=f
                WHB.write(path_name, file_name)


if __name__ == "__main__":
    for get_path in find_path:
        main(get_path)

开始测试脚本
# time python hbase_test.py

real 1m15.471s
user 0m4.881s
sys 0m2.867s

到hbase里查看写入的数量，证明已经完全写入。
hbase(main):001:0> count ‘img’
:
:
:
13140 row(s) in 10.2780 seconds

2013-5-16. 因为对hadoop理解不足。以下写的有问题，提醒大家注意。

hbase使用hadoop进行存储，查看hadoop的磁盘使用量。
26K namenode1/
298M u01/

　　我的内存给namenode可以使用25G。根据以上数据计算结果如下：
((25*1000*1000）/26)*298= 286538461M = 286538G = 286 T

　　如果每台服务器有三块1T存储硬盘，此集群可以有95台服务器。共存储此类照片大约为12634615360张。内网测试，写入速度3.9M。

　　注：有一点需要注意，写入的数据删除后磁盘空间也不会释放，原理应该改和mongodb一样，但是没有仔细查看。

Page 17 of 26« First ‹ Previous 14 15 161718 19 20 Next ›Last »

simonzhang的家

有朋自远方来。。。。。

Category Archives: python备忘

不明IP登陆linux服务器时邮件通知

linux下查看使用硬盘IO过高的进程

服务器自动封锁和解封IP 对付攻击

我的用 raspberry pi 手工制作的小车（一）

python 连接hbase存、取图片

2025年九月
一	二	三	四	五	六	日
« 1月
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30