python分析nginx日志的ip(来

发布时间:2019-09-12 08:00:01编辑:auto阅读(1258)

    #!/usr/bin/env python
    #_*_coding:utf-8 _*_
    __author__ = 'gaogd'
    
    import datetime,threading
    import sys, os, urllib2, json
    reload(sys)
    sys.setdefaultencoding('utf8')
    
    ips = {}  # ip作为字典的key,访问次数做value
    iplist = []  # 遍历日志中的ip,相同的ip也会记录到列表,插入数据库
    fh = open("./ip.txt", "r").readlines()  # 我的是把日志和代码在一个目录下面
    
    
    for line in fh:
        ip = line.split(" ")[0]
        if 6 < len(ip) <= 15:
            ips[ip] = ips.get(ip, 0) + 1
            alist = iplist.append(ip)
    
    def get_ip_area(ip,num):
        try:
            apiurl = "http://ip.taobao.com/service/getIpInfo.php?ip=%s" %ip
            content = urllib2.urlopen(apiurl).read()
            data = json.loads(content)['data']
            code = json.loads(content)['code']
            if code == 0: # success
                country=(data['country'])
                area = (data['area'])
                region = (data['region'])
                city = (data['city'])
                ip = (data['ip'])
                print(data['country']),(data['area']),(data['region']),(data['city']),(data['ip'])
                line = data['country'],data['area'],data['region'],data['city'],data['ip']
                data = "%s,%s,%s,%s,%s\n" % line
                file='./checkiparea%s.txt' %num
                with open(file, 'a+') as f:
                     f.write(data)
            else:
                print data
                # data = "%s,\n" % data
                # with open('./checkiparea.txt', 'a+') as f:
                #     f.write(data)
    
        except Exception as ex:
            print ex
    
    
    
    def getip(num,num2):
        for ip in iplist[num:num2]:
            get_ip_area(ip,num)
    
    if __name__ == '__main__':
        for i in range(300):
            number=i*10000
            t = threading.Thread(target=getip, args=(number,number*(1+i)))
            t.start()



    Python 脚本如下:

    #!/usr/bin/env python
    #_*_coding:utf-8 _*_
    __author__ = 'gaogd'
    
    
    import MySQLdb as mysql
    import datetime
    import sys, os, urllib2, json
    
    db = mysql.connect(user="root",passwd="xxxx@2015",db="intest",host="192.168.10.12") #数据库连接信息
    db.autocommit(True)
    cur = db.cursor()
    cur.execute('set names utf8')
    addtime = datetime.datetime.now()
    
    ips = {}        #ip作为字典的key,访问次数做value
    iplist = []     #遍历日志中的ip,相同的ip也会记录到列表,插入数据库
    fh = open("./access_test.log", "r").readlines()                                                               #我的是把日志和代码在一个目录下面
    for line in fh:
        ip = line.split(" ")[0]
        print ip
        if 6 < len(ip) <=15:
            ips[ip] = ips.get(ip, 0) + 1
            
            alist = iplist.append(ip)
    for key,value in ips.items():
        listinfo = str(ips)
        sql = 'insert into ipinfo(ipaddress,countip) value ("%s","%s")' % (key,value)
        cur.execute(sql)
    
    def get_ip_area(ip):
        try:
            apiurl = "http://ip.taobao.com/service/getIpInfo.php?ip=%s" %ip
            content = urllib2.urlopen(apiurl).read()
            data = json.loads(content)['data']
            code = json.loads(content)['code']
            if code == 0: # success
                country=(data['country'])
                area = (data['area'])
                region = (data['region'])
                city = (data['city'])
                ip = (data['ip'])
                print(data['country']),(data['area']),(data['region']),(data['city']),(data['ip'])
                sql = 'insert into whereip (country,area,region,city,ip,time) value ("%s","%s","%s","%s","%s","%s")' % (country.encode("utf-8"),area.encode("utf-8"),region.encode("utf-8"),city.encode("utf-8"),ip.encode("utf-8"),addtime)
                cur.execute(sql)
                print 'sql:',sql
            else:
                print data
        except Exception as ex:
            print ex
    
    if __name__ == '__main__':
        for ip in iplist:
            get_ip_area(ip)
            
            
    ################################################
    '''
    
      
    radiansdict.get(key, default=None)
    返回指定键的值,如果值不在字典中返回default值
    
    '''

    测试淘宝的ip库,拿到的数据内容如下:

    http://ip.taobao.com/service/getIpInfo.php?ip=66.249.65.183

    wKiom1fHo9_wi3A4AAA-4sXTDwg946.png-wh_50

    获取到的字典在http://tool.oschina.net/codeformat/json 进行格式化得到下面格式
    {
        "code": 0, 
        "data": {
            "country": "中国", 
            "country_id": "CN", 
            "area": "华南", 
            "area_id": "800000", 
            "region": "广东省", 
            "region_id": "440000", 
            "city": "深圳市", 
            "city_id": "440300", 
            "county": "", 
            "county_id": "-1", 
            "isp": "电信", 
            "isp_id": "100017", 
            "ip": "113.97.193.87"
        }}


    数据数据库语句

    mysql> create database intest ;
    Query OK, 1 row affected (0.00 sec)
    
    mysql> use intest;
    Database changed
    mysql> create table ipinfo(id int auto_increment primary key,ipaddress varchar(200),countip int);
    Query OK, 0 rows affected (0.22 sec)
    
    mysql> create table whereip(id int primary key auto_increment,country varchar(100),area varchar(100),region varchar(100),city varchar(100),ip varchar(100),time datetime);
    Query OK, 0 rows affected (0.20 sec)
    mysql>
    
    mysql> desc ipinfo;
    +-----------+--------------+------+-----+---------+----------------+
    | Field     | Type         | Null | Key | Default | Extra          |
    +-----------+--------------+------+-----+---------+----------------+
    | id        | int(11)      | NO   | PRI | NULL    | auto_increment |
    | ipaddress | varchar(200) | YES  |     | NULL    |                |
    | countip   | int(11)      | YES  |     | NULL    |                |
    +-----------+--------------+------+-----+---------+----------------+
    3 rows in set (0.00 sec)
    
    mysql> desc whereip;
    +---------+--------------+------+-----+---------+----------------+
    | Field   | Type         | Null | Key | Default | Extra          |
    +---------+--------------+------+-----+---------+----------------+
    | id      | int(11)      | NO   | PRI | NULL    | auto_increment |
    | country | varchar(100) | YES  |     | NULL    |                |
    | area    | varchar(100) | YES  |     | NULL    |                |
    | region  | varchar(100) | YES  |     | NULL    |                |
    | city    | varchar(100) | YES  |     | NULL    |                |
    | ip      | varchar(100) | YES  |     | NULL    |                |
    | time    | datetime     | YES  |     | NULL    |                |
    +---------+--------------+------+-----+---------+----------------+
    7 rows in set (0.00 sec)
    
    mysql>


    分析完日志查看数据库的内容:

    mysql> select * from whereip limit 10;
    +----+---------+--------+-----------+-----------+-----------------+---------------------+
    | id | country | area   | region    | city      | ip              | time                |
    +----+---------+--------+-----------+-----------+-----------------+---------------------+
    |  1 | 美国    |        |           |           | 66.249.65.133   | 2016-09-01 10:53:01 |
    |  2 | 美国    |        |           |           | 23.251.63.45    | 2016-09-01 10:53:01 |
    |  3 | 美国    |        |           |           | 23.251.63.45    | 2016-09-01 10:53:01 |
    |  4 | 中国    | 华南   | 广东省    | 广州市    | 119.130.71.153  | 2016-09-01 10:53:01 |
    |  5 | 美国    |        |           |           | 66.249.65.183   | 2016-09-01 10:53:01 |
    |  6 | 美国    |        |           |           | 66.249.65.180   | 2016-09-01 10:53:01 |
    |  7 | 美国    |        |           |           | 66.249.65.142   | 2016-09-01 10:53:01 |
    |  8 | 美国    |        |           |           | 107.151.226.203 | 2016-09-01 10:53:01 |
    |  9 | 美国    |        |           |           | 107.151.226.203 | 2016-09-01 10:53:01 |
    | 10 | 美国    |        |           |           | 66.249.65.134   | 2016-09-01 10:53:01 |
    +----+---------+--------+-----------+-----------+-----------------+---------------------+
    10 rows in set (0.00 sec)
    
    mysql>


关键字