3月 08

linux下查看使用硬盘IO过高的进程

Posted on 2013 年 3 月 8 日 by 张子萌

　　服务器cpu使用率不高，load比较高，所以要查看一下IO。硬盘IO可以通过命令vmstat或iostat获得（也可以用yum 安装dstat获得），网络IO可以用iftop命令获取。但是不知道那个进程使用硬盘IO比较高，通过查找没有找到相关命令，只好自己写个脚本进行统计处理。
　　本脚本在CentOS6下（kernel2.6以上）python2.6测试通过。
　　直接运行脚本，默认情况下收集3秒钟数据，显示读写最高的前三个进程。如用参数可以使用命令“python fhip.py 4 5 3”，第一个数位每次收集读写数据的间隔秒数，第二个数是打印出读写最多的n个进程，第三个为运行脚本的次数。因为参数部分写的比较简单那，所以用参数必须3个全写。

#!/bin/python 
#-*- coding:utf-8 -*- 
# Filename:    find_high_io_process
# Revision:    1.0 
# Date:        2013-3-8 
# Author:      simonzhang 
# web:         www.simonzhang.net 
# Email:       simon-zzm@163.com 
### END INIT INFO
import os
import re
import sys
import time
from string import strip

####
sys_proc_path = '/proc/'
re_find_process_number = '^\d+$'

####
# 通过/proc/$pid/io获取读写信息
####
def collect_info():
    _tmp = {}
    re_find_process_dir = re.compile(re_find_process_number)
    for i in os.listdir(sys_proc_path):
        if re_find_process_dir.search(i):
            # 获得进程名
            process_name = open("%s%s/stat" % (sys_proc_path, i), "rb").read().split(" ")[1]
            # 读取io信息
            rw_io = open("%s%s/io" % (sys_proc_path, i), "rb").readlines()
            for _info in rw_io:
                cut_info = strip(_info).split(':')
                if strip(cut_info[0]) == "read_bytes":
                    read_io = int(strip(cut_info[1]))
                if strip(cut_info[0]) == "write_bytes":
                    write_io = int(strip(cut_info[1]))
            _tmp[i] = {"name":process_name, "read_bytes":read_io, "write_bytes":write_io}
    return _tmp


def main(_sleep_time, _list_num):
    _sort_read_dict = {}
    _sort_write_dict = {}
    # 获取系统读写数据
    process_info_list_frist = collect_info()
    time.sleep(_sleep_time)
    process_info_list_second = collect_info()
    # 将读数据和写数据进行分组，写入两个字典中
    for loop in process_info_list_second.keys():
        second_read_v = process_info_list_second[loop]["read_bytes"]
        second_write_v = process_info_list_second[loop]["write_bytes"]
        try:
            frist_read_v = process_info_list_frist[loop]["read_bytes"]
        except:
            frist_read_v = 0
        try:
            frist_write_v = process_info_list_frist[loop]["write_bytes"]
        except:
            frist_write_v = 0
        # 计算第二次获得数据域第一次获得数据的差
        _sort_read_dict[loop] = second_read_v - frist_read_v
        _sort_write_dict[loop] = second_write_v - frist_write_v
    # 将读写数据进行排序
    sort_read_dict = sorted(_sort_read_dict.items(),key=lambda _sort_read_dict:_sort_read_dict[1],reverse=True)
    sort_write_dict = sorted(_sort_write_dict.items(),key=lambda _sort_write_dict:_sort_write_dict[1],reverse=True)
    # 打印统计结果
    print "pid     process     read(bytes) pid     process     write(btyes)"
    for _num in range(_list_num):
        read_pid = sort_read_dict[_num][0]
        write_pid = sort_write_dict[_num][0]
        res = "%s" % read_pid
        res += " " * (8 - len(read_pid)) + process_info_list_second[read_pid]["name"]
        res += " " * (12 - len(process_info_list_second[read_pid]["name"])) + "%s" % sort_read_dict[_num][1]
        res += " " * (12 - len("%s" % sort_read_dict[_num][1])) + write_pid
        res += " " * (8 - len(write_pid)) + process_info_list_second[write_pid]["name"]
        res += " " * (12 - len("%s" % process_info_list_second[write_pid]["name"])) + "%s" % sort_write_dict[_num][1]
        print res
    print "\n" * 1


if __name__ == '__main__':
    try:
        _sleep_time = sys.argv[1]
    except:
        _sleep_time = 3
    try:
        _num = sys.argv[2]
    except:
        _num = 3
    try:
        loop = sys.argv[3]
    except:
        loop = 1
    for i in range(int(loop)):
        main(int(_sleep_time), int(_num))

linux查找IO高的进程的源码

12月 04

python 源码删除注释并编译成字节码

Posted on 2012 年 12 月 4 日 by 张子萌

　　上线需要，将py的源码中注释删掉，然后编译成字节码，这样加载速度会比较快。写此脚本主要是为了删除注释。当然如果上线不想放py源码，则在最后增加删除源码即可。我把这个代码起名为咕噜咕噜。python 源码删除注释并编译。

#!/bin/env python
# -*- coding:utf-8 -*-
# -------------------------------
# Filename:    
# Revision:
# Date:        2012-12-3
# Author:      simonzhang
# Email:       simon-zzm@163.com
# Web:         www.simonzhang.net 
# -------------------------------
import os
import re
import sys
import shutil
import compileall


def delete_Notes(py_file):
    # 原始文件只读打开，处理文件追加打开
    _tmp_sr_file = open(py_file, "rb").readlines()
    _tmp_de_file = open("%s.swp" % py_file, "ab")
    _skip_status = 0
    _now_line = 0
    _multi_count = 0 
    # 循环处理
    for line in _tmp_sr_file:
        # 跳过前10行，因为我的开头注释有10行
        if _now_line > 10:
            # 获取开头一位和三位
            try:
                _single_row_notes = line.strip()[0]
            except:
                _single_row_notes = ""
            try: 
                _multi_row_notes = line.strip()[0:3]
            except:
                _multi_row_notes = ""
            # 获取行是否为注释
            if _single_row_notes == "#":
                _skip_status = 1
            elif _multi_row_notes == "'''":
                if _multi_count == 0:
                    _skip_status = 1
                    _multi_count = 1
                else:
                    _skip_status = 1
                    _multi_count = 0
            elif _multi_count == 1:
                _skip_status = 1
            else:
                _skip_status = 0
        else:
            _skip_status = 0
        # 判断是否跳过写入
        if _skip_status == 0:
            _tmp_de_file.write(line)
        _now_line += 1
    _tmp_de_file.close()
    # 处理完毕将临时文件处理为原始文件
    shutil.move("%s.swp" % py_file, py_file)
        

def main():
    _get_src_path = sys.argv[1]
    _get_dec_path = sys.argv[2]
    if os.path.exists(_get_src_path):
        # 拷贝原始文件夹
        shutil.copytree(_get_src_path, _get_dec_path)
        # 删除原始文件中的注释
        find_py_file = re.compile(r"^.*\.py$")
        find_walk = os.walk(_get_dec_path)
        for path,dirs,files in find_walk:
            for f in files:
                if find_py_file.search(f):
                    delete_Notes("%s/%s" % (path, f))
        # 编译成字节码
        compileall.compile_dir(_get_dec_path)
    else:
        print "Path Error!"

if __name__ == "__main__":
    main()

使用方法，
gulugulu.py 源码路径目标路径

python 源码删除注释并编译

6月 18

tornado学习笔记（二）

Posted on 2012 年 6 月 18 日 by 张子萌

主要模块

web – FriendFeed 使用的基础 Web 框架，包含了 Tornado 的大多数重要的功能
escape – XHTML, JSON, URL 的编码/解码方法
database – 对 MySQLdb 的简单封装，使其更容易使用
template – 基于 Python 的 web 模板系统
httpclient – 非阻塞式 HTTP 客户端，它被设计用来和 web 及 httpserver 协同工作
auth – 第三方认证的实现（包括 Google OpenID/OAuth、Facebook Platform、Yahoo BBAuth、FriendFeed OpenID/OAuth、Twitter OAuth）
locale – 针对本地化和翻译的支持
options – 命令行和配置文件解析工具，针对服务器环境做了优化

底层模块

httpserver – 服务于 web 模块的一个非常简单的 HTTP 服务器的实现
iostream – 对非阻塞式的 socket 的简单封装，以方便常用读写操作
ioloop – 核心的 I/O 循环

　　首先在建立一个mysql数据库，库名为test_tornado，建立一个有用户表，表中包含用户名密码，脚本如下。

CREATE TABLE `user` (
  `id` int(100) NOT NULL DEFAULT '0',
  `user` varchar(20) DEFAULT NULL,
  `passwd` varchar(50) DEFAULT NULL,
  PRIMARY KEY (`id`)
);


INSERT INTO `user` VALUES ('0', 'simonzhang', '123456');

　　建立监听，和url，启停脚本见上次笔记。
main.py

#!/bin/python
#-*- coding:utf-8 -*-
# Filename:    main.py
# Revision:    1.0
# Date:        2012-06-14
# Author:      simonzhang
# web:         www.simonzhang.net
# Email:       simon-zzm@163.com
### END INIT INFO
import sys
import tornado.ioloop
import tornado.web
from login import *


application = tornado.web.Application([
    (r"/", LoginHandler),
],  cookie_secret="61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=")


if __name__ == "__main__":
    listen_port =  sys.argv[1]
    application.listen(listen_port)
    tornado.ioloop.IOLoop.instance().start()

　　在同级目录下建立templates目录，在templates目录下建立login.html。login.html的源码是


   
      {{title}}
   
   
        
	
		
			User Name:
			
		
		
			Password:

　　做一个简单的验证页面，只是个简单判断，学习使用，权限认证和cookie部分不做记录了。编辑login.py源码

#!/bin/python
#-*- coding:utf-8 -*-
# Filename:    main.py
# Revision:    1.0
# Date:        2012-06-14
# Author:      simonzhang
# web:         www.simonzhang.net
# Email:       simon-zzm@163.com
### END INIT INFO
import sys
import tornado.ioloop
import tornado.locale
import tornado.web
import tornado.database
from dbmodel import *


class LoginHandler(tornado.web.RequestHandler):
    def get(self):
        self.render("templates/login.html", title="simonzhan.net")
    def post(self):
        try:
            name = self.get_argument("login_username")
            passwd = self.get_argument("login_password")
            _passwd = get_passwd(name)
            if _passwd is not None:
                if _passwd == _passwd:
                    self.write("hello %s" % (_passwd))
                else:
                    self.render("templates/login.html", title="simonzhan.net")
            else:
               self.render("templates/login.html", title="simonzhan.net")
            #self.write("hello %s" % (name))
        except:
            self.render("templates/login.html", title="simonzhan.net")
            return

　　因为要查数据，所以要用到database，将数据库部分放到一文件中去。编辑dbmodol.py源码如下：

#!/bin/env python
# -*- coding: utf-8 -*-
# Filename:    main.py
# Revision:    1.0
# Date:        2012-06-14
# Author:      simonzhang
# web:         www.simonzhang.net
# Email:       simon-zzm@163.com
### END INIT INFO
from tornado import database


def get_passwd(user_name):
    db = database.Connection("192.168.1.41","test_tornado","123456","simonzhang")
    for projects in db.query("SELECT user,passwd from user where user='%s'" % user_name):
        return projects.passwd

　　启动服务，在ie里能看到页面，输入正确的账户密码，可以看到经典的话了。基本框架完成，剩下的慢慢学习，慢慢发挥了。

4月 25

学习 python 编写规范 pep8 的问题笔记

Posted on 2012 年 4 月 25 日 by 张子萌

　　在学习过程中有如下问题，做个记录。
　　以前没有注意的问题
1)
一行列数 : PEP 8 规定为 79 列，这个太苛刻了，如果要拼接url一般都会超。
一个函数 : 不要超过 30 行代码, 即可显示在一个屏幕类，可以不使用垂直游标即可看到整个函数。
一个类 : 不要超过 200 行代码，不要有超过 10 个方法。
一个模块 : 不要超过 500 行。

2)不要在一句import中多个库
不推荐
import os, sys

simonzhang的家

有朋自远方来。。。。。

Tag Archives: import sys

linux下查看使用硬盘IO过高的进程

python 源码删除注释并编译成字节码

tornado学习笔记（二）

学习 python 编写规范 pep8 的问题笔记

2025年七月
一	二	三	四	五	六	日
« 1月
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30	31