python real time analysis log for a small script share

  • 2020-05-30 20:33:56
  • OfStack

preface

As we all know, the operation and maintenance of Web should always pay attention to the real-time 2xx/s, 4xx/s, 5xx/s, response time, bandwidth and other indicators of relevant domain names. Before, the log was divided into 5 minutes and 1 minute. If I change it to 1 day, 1 split, it's obviously not appropriate to continue using Shell, so I wrote it using Python.

The method is as follows:

The script mainly USES the seek and tell functions of the file. The principle is as follows:

1. Add crontab and perform once every 5 minutes

2. Only analyze the logs between the end of the log file read last time and the end of the log file read this time, and get the results
You can use zabbix_sender to send the result to zabbix server or directly use zabbix agent to read the file and fetch data. The code is as follows:


#!/usr/bin/env python
#coding: utf-8

from __future__ import division
import os

LOG_FILE = '/data0/logs/nginx/xxxx-access_log'
POSITION_FILE = '/tmp/position.log'
STATUS_FILE = '/tmp/http_status'
#crontab  The execution time 
CRON_TIME = 300

def get_position():
 # The first 1 Read the log file for the first time, POSITION_FILE Is empty 
 if not os.path.exists(POSITION_FILE):
  start_position = str(0)
  end_position = str(os.path.getsize(LOG_FILE))
  fh = open(POSITION_FILE,'w')
  fh.write('start_position: %s\n' % start_position)
  fh.write('end_position: %s\n' % end_position)
  fh.close()
  os._exit(1)
 else:
  fh = open(POSITION_FILE)
  se = fh.readlines()
  fh.close()
  # Other unexpected circumstances POSITION_FILE It's not two lines 
  if len(se) != 2:
   os.remove(POSITION_FILE)
   os._exit(1)
  last_start_position,last_end_position = [item.split(':')[1].strip() for item in se]
  start_position = last_end_position
  end_position = str(os.path.getsize(LOG_FILE))
  # Log rotation results start_position > end_position
  #print start_position,end_position
  if start_position > end_position:
   start_position = 0
  # When the log stops scrolling 
  elif start_position == end_position:
   os._exit(1)
  #print start_position,end_position
  fh = open(POSITION_FILE,'w')
  fh.write('start_position: %s\n' % start_position)
  fh.write('end_position: %s\n' % end_position)
  fh.close()
  return map(int,[start_position,end_position])

def write_status(content):
 fh = open(STATUS_FILE,'w')
 fh.write(content)
 fh.close()

def handle_log(start_position,end_position):
 log = open(LOG_FILE)
 log.seek(start_position,0)
 status_2xx,status_403,status_404,status_500,status_502,status_503,status_504,status_all,rt,bandwidth = 0,0,0,0,0,0,0,0,0,0
 while True:
  current_position = log.tell()
  if current_position >= end_position:
   break
  line = log.readline()
  line = line.split(' ')
  host,request_time,time_local,status,bytes_sent = line[1],line[3],line[5],line[10],line[11]
  #print host,request_time,time_local,status,bytes_sent
  status_all += 1
  try:
   rt += float(request_time.strip('s'))
   bandwidth += int(bytes_sent)
  except:
   pass
  if status == '200' or status == '206':
   status_2xx += 1
  elif status == '403':
   status_403 += 1
  elif status == '404':
   status_404 += 1
  elif status == '500':
   status_500 += 1
  elif status == '502':
   status_502 += 1
  elif status == '503':
   status_503 += 1
  elif status == '504':
   status_504 += 1
 log.close()
 #print "status_2xx: %s\nstatus_403: %s\nstatus_404: %s\nstatus_500: %s\nstatus_502: %s\nstatus_503: %s\nstatus_504: %s\nstatus_all: %s\nrt: %s\nbandwidth: %s\n" % (status_2xx/CRON_TIME,status_403/CRON_TIME,status_404/CRON_TIME,status_500/CRON_TIME,status_502/CRON_TIME,status_503/CRON_TIME,status_504/CRON_TIME,status_all/CRON_TIME,rt/status_all,bandwidth/CRON_TIME)

 write_status("status_2xx: %s\nstatus_403: %s\nstatus_404: %s\nstatus_500: %s\nstatus_502: %s\nstatus_503: %s\nstatus_504: %s\nstatus_all: %s\nrt: %s\nbandwidth: %s\n" % (status_2xx/CRON_TIME,status_403/CRON_TIME,status_404/CRON_TIME,status_500/CRON_TIME,status_502/CRON_TIME,status_503/CRON_TIME,status_504/CRON_TIME,status_all/CRON_TIME,rt/status_all,bandwidth/CRON_TIME))

if __name__ == '__main__':
 start_position,end_position = get_position()
 handle_log(start_position,end_position)

Take a look at the results of the analysis:


cat /tmp/http_status
status_2xx: 17.3333333333
status_403: 0.0
status_404: 1.0
status_500: 0.0
status_502: 0.0
status_503: 0.0
status_504: 0.0
status_all: 20.0
rt: 0.0782833333333
bandwidth: 204032.0

Later, it was found that there was something wrong with start_position, end_position using string comparison, as follows:


In [5]: '99772400' > '100227572'
Out[5]: True

In [6]: int('99772400') > int('100227572')
Out[6]: False

Therefore, the correction is as follows:


# Log rotation results start_position > end_position
#print start_position,end_position
if int(start_position) > int(end_position):
 start_position = 0
# When the log stops scrolling 
elif int(start_position) == int(end_position):
 os._exit(1)

conclusion


Related articles: