python real time analysis log for a small script share
- 2020-05-30 20:33:56
- OfStack
preface
As we all know, the operation and maintenance of Web should always pay attention to the real-time 2xx/s, 4xx/s, 5xx/s, response time, bandwidth and other indicators of relevant domain names. Before, the log was divided into 5 minutes and 1 minute. If I change it to 1 day, 1 split, it's obviously not appropriate to continue using Shell, so I wrote it using Python.
The method is as follows:
The script mainly USES the seek and tell functions of the file. The principle is as follows:
1. Add crontab and perform once every 5 minutes
2. Only analyze the logs between the end of the log file read last time and the end of the log file read this time, and get the results
You can use zabbix_sender to send the result to zabbix server or directly use zabbix agent to read the file and fetch data. The code is as follows:
#!/usr/bin/env python
#coding: utf-8
from __future__ import division
import os
LOG_FILE = '/data0/logs/nginx/xxxx-access_log'
POSITION_FILE = '/tmp/position.log'
STATUS_FILE = '/tmp/http_status'
#crontab The execution time
CRON_TIME = 300
def get_position():
# The first 1 Read the log file for the first time, POSITION_FILE Is empty
if not os.path.exists(POSITION_FILE):
start_position = str(0)
end_position = str(os.path.getsize(LOG_FILE))
fh = open(POSITION_FILE,'w')
fh.write('start_position: %s\n' % start_position)
fh.write('end_position: %s\n' % end_position)
fh.close()
os._exit(1)
else:
fh = open(POSITION_FILE)
se = fh.readlines()
fh.close()
# Other unexpected circumstances POSITION_FILE It's not two lines
if len(se) != 2:
os.remove(POSITION_FILE)
os._exit(1)
last_start_position,last_end_position = [item.split(':')[1].strip() for item in se]
start_position = last_end_position
end_position = str(os.path.getsize(LOG_FILE))
# Log rotation results start_position > end_position
#print start_position,end_position
if start_position > end_position:
start_position = 0
# When the log stops scrolling
elif start_position == end_position:
os._exit(1)
#print start_position,end_position
fh = open(POSITION_FILE,'w')
fh.write('start_position: %s\n' % start_position)
fh.write('end_position: %s\n' % end_position)
fh.close()
return map(int,[start_position,end_position])
def write_status(content):
fh = open(STATUS_FILE,'w')
fh.write(content)
fh.close()
def handle_log(start_position,end_position):
log = open(LOG_FILE)
log.seek(start_position,0)
status_2xx,status_403,status_404,status_500,status_502,status_503,status_504,status_all,rt,bandwidth = 0,0,0,0,0,0,0,0,0,0
while True:
current_position = log.tell()
if current_position >= end_position:
break
line = log.readline()
line = line.split(' ')
host,request_time,time_local,status,bytes_sent = line[1],line[3],line[5],line[10],line[11]
#print host,request_time,time_local,status,bytes_sent
status_all += 1
try:
rt += float(request_time.strip('s'))
bandwidth += int(bytes_sent)
except:
pass
if status == '200' or status == '206':
status_2xx += 1
elif status == '403':
status_403 += 1
elif status == '404':
status_404 += 1
elif status == '500':
status_500 += 1
elif status == '502':
status_502 += 1
elif status == '503':
status_503 += 1
elif status == '504':
status_504 += 1
log.close()
#print "status_2xx: %s\nstatus_403: %s\nstatus_404: %s\nstatus_500: %s\nstatus_502: %s\nstatus_503: %s\nstatus_504: %s\nstatus_all: %s\nrt: %s\nbandwidth: %s\n" % (status_2xx/CRON_TIME,status_403/CRON_TIME,status_404/CRON_TIME,status_500/CRON_TIME,status_502/CRON_TIME,status_503/CRON_TIME,status_504/CRON_TIME,status_all/CRON_TIME,rt/status_all,bandwidth/CRON_TIME)
write_status("status_2xx: %s\nstatus_403: %s\nstatus_404: %s\nstatus_500: %s\nstatus_502: %s\nstatus_503: %s\nstatus_504: %s\nstatus_all: %s\nrt: %s\nbandwidth: %s\n" % (status_2xx/CRON_TIME,status_403/CRON_TIME,status_404/CRON_TIME,status_500/CRON_TIME,status_502/CRON_TIME,status_503/CRON_TIME,status_504/CRON_TIME,status_all/CRON_TIME,rt/status_all,bandwidth/CRON_TIME))
if __name__ == '__main__':
start_position,end_position = get_position()
handle_log(start_position,end_position)
Take a look at the results of the analysis:
cat /tmp/http_status
status_2xx: 17.3333333333
status_403: 0.0
status_404: 1.0
status_500: 0.0
status_502: 0.0
status_503: 0.0
status_504: 0.0
status_all: 20.0
rt: 0.0782833333333
bandwidth: 204032.0
Later, it was found that there was something wrong with start_position, end_position using string comparison, as follows:
In [5]: '99772400' > '100227572'
Out[5]: True
In [6]: int('99772400') > int('100227572')
Out[6]: False
Therefore, the correction is as follows:
# Log rotation results start_position > end_position
#print start_position,end_position
if int(start_position) > int(end_position):
start_position = 0
# When the log stops scrolling
elif int(start_position) == int(end_position):
os._exit(1)
conclusion