python解析Nginx访问日志
2024-10-15 06:44:45
环境说明
python3+
pip install geoip2==2.9.0
nginx日志配置成json格式,配置如下:
log_format json_log '{ "time": "$time_local", '
'"remote_addr": "$remote_addr", '
'"remote_user": "$remote_user", '
'"body_bytes_sent": "$body_bytes_sent", '
'"request_time": "$request_time", '
'"status": "$status", '
'"request": "$request", '
'"request_method": "$request_method", '
'"http_referrer": "$http_referer", '
'"body_bytes_sent":"$body_bytes_sent", '
'"http_x_forwarded_for": "$http_x_forwarded_for", '
'"http_user_agent": "$http_user_agent"}';
配置日志成json格式
生成的日志如下:
配置脚本
#encoding: utf-8
import os
import sys
import json
from datetime import datetime
from geoip2.database import Reader logfile = sys.argv[1]
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) def stat_days(infile):
"""
统计每天日志数据
"""
day_data = {}
with open(infile, 'r', encoding="utf-8") as fhandler:
for line in fhandler.readlines():
try:
line=line.strip('\n')
#print('---------------')
#print(line)
dict_line = json.loads(line) # ip datetime method url status bytes
#dict_line['remote_addr'] dict_line['time'] dict_line['request_method'] dict_line['request'] dict_line['status'] dict_line['body_bytes_sent']
#_day = datetime.strptime(dict_line['time'], '%d/%b/%Y:%H:%M:%S').strftime('%Y-%m-%d')
_day = '2018-11-29'
#设置每天的默认值
day_data.setdefault(_day, {'hits': 0, 'vistors': {}, 'status': {}, 'bytes': 0})
#设置每天出现的IP访问次数默认为0
day_data[_day]['vistors'].setdefault(dict_line['remote_addr'], 0)
#设置每天出现的状态码默认值为0
day_data[_day]['status'].setdefault(dict_line['status'],0) #统计数据
day_data[_day]['hits'] += 1
day_data[_day]['vistors'][dict_line['remote_addr']] += 1
day_data[_day]['status'][dict_line['status']] += 1
day_data[_day]['bytes'] += int(dict_line['body_bytes_sent']) if dict_line['body_bytes_sent'].isdigit() else 0
except Exception as err:
continue
return sorted(day_data.items(), key=lambda x:x[0]) def stat_total(days):
"""
统计总数据
"""
total_data = {'hits': 0, 'vistors': {}, 'status': {}, 'bytes': 0} for _day, _stat in days:
total_data['hits'] += _stat['hits']
total_data['bytes'] += _stat['bytes'] for _ip, _cnt in _stat['vistors'].items():
total_data['vistors'].setdefault(_ip, 0)
total_data['vistors'][_ip] += _cnt for _status, _cnt in _stat['status'].items():
total_data['status'].setdefault(_status, 0)
total_data['status'][_status] += _cnt
return total_data def stat_region(total_data):
"""
统计区域
"""
region_data = {}
region_location = {} #打开maxmind mmdb文件
geoip2_reader = Reader(os.path.join(BASE_DIR, 'db', 'GeoLite2-City.mmdb')) for _ip, _cnt in total_data['vistors'].items():
try:
_city = geoip2_reader.city(_ip) #只显示国内IP地址
#if _city.country.names.get('zh-CN', '') != '中国':
#continue
#获取国家和城市信息
_city_name = '{}/{}'.format(_city.country.names.get('zh-CN', ''), _city.city.names.get('zh-CN', ''))
region_data.setdefault(_city_name, 0) #统计每天城市发生访问次数
region_data[_city_name] += _cnt
except Exception as err:
print(err) #关闭文件
geoip2_reader.close()
return region_data def formatSize(bytes):
bytes = float(bytes)
kb = bytes / 1024
if kb >= 1024:
M = kb /1024
if M >= 1024:
G = M /1024
return "{} G".format(G)
else:
return "{} M".format(M)
else:
return "{} K".format(kb) def main(infile):
"""
主程序
"""
#获取各种统计结果
day_data = stat_days(infile) #每天统计项
total_data = stat_total(day_data) #总统计项
region_data = sorted(stat_region(total_data).items(), key=lambda x:x[1], reverse=True)
status_data = total_data['status'] access_num = total_data['hits']
ip_num = len(total_data['vistors'])
ip_detail = sorted(total_data['vistors'].items(), key=lambda x:x[1], reverse=True)
traffic = formatSize(total_data['bytes']) print("""
总访问量: {}
总IP数: {}
总流量: {} """.format(access_num, ip_num, traffic))
print('\n-------Top 15 地区访问分布-------')
for region in region_data[0:15]:
print("{}:{}".format(region[0], region[1])) print('\n-------Top 15 ip访问-------')
for ip in ip_detail[0:15]:
print("{} {}".format(ip[0], ip[1])) print('\n-------状态码情况-------')
for code, cnt in status_data.items():
print("{} {}".format(code, cnt)) if __name__ == "__main__":
main(logfile)
logganalysis.py
最新文章
- jQueryAjax笔记
- iPad开发--美团界面的搭建(主要是对Popover的使用,以及监听)
- NeHe OpenGL教程 第八课:混合
- poj 2100 Graveyard Design
- 【暑假】[实用数据结构]UVAlive 3644 X-Plosives
- LNK1123: 转换到 COFF 期间失败: 文件无效或损坏[汇总]
- Jquery 获取日期date()对象
- db2的select语句在db2 client上执行正确,JDBC连接数据库时报错
- Struts2详解
- [转载] Dubbo实现RPC调用使用入门
- 进击Node.js基础(一)
- mui 记录
- 简单的NIO使用实例
- wpf-x-指令元素
- mysql双主+keepalived【转】
- MVC5+Easyui1.3.6+EF6 开发部分备忘笔记
- HTML5进阶
- Drying
- 13.A={1,2,3,5}和为10的问题
- c#基础学习(0626)之占位符、转义符