python Processing Large Log Files

  • 2021-07-26 08:03:27
  • OfStack

This article example for everyone to share python processing large log file specific code, for your reference, the specific content is as follows


# coding=utf-8
import sys
import time
 
class Tail():
 def __init__(self,file_name,callback=sys.stdout.write):
  self.file_name = file_name
  self.callback = callback
 def follow(self,n=10):
  try:
   #  Open a file 
   with open(self.file_name) as f:
    self._file = f
    self._file.seek(0,2)
    #  Character length of storage file 
    self.file_length = self._file.tell()
    #  Print last 10 Row 
    self.showLastLine(n)
    #  Keep reading files   Print increment 
    while True:
     line = self._file.readline()
     if line:
      self.callback(line)
     time.sleep(1)
  except Exception,e:
   print ' Failed to open the file, see if the file does not exist, or there is a problem with permissions '
   print e
 def showLastLine(self, n):
  # 1 Line approximation 100 A bar   This number is changed to 1 Or 1000 All right 
  len_line = 100
  # n The default is 10 , you can also follow Parameters of are passed in 
  read_len = len_line*n
  #  Use last_lines Store the last content to be processed 
  while True:
   #  If the 1000 Characters, greater than the length of the previously stored file 
   #  After reading the document, directly break
   if read_len>self.file_length:
    self._file.seek(0)
    last_lines = self._file.read().split('\n')[-n:]
    break
   #  Read first 1000 A   Then judge 1000 The number of newline characters in a character 
   self._file.seek(-read_len, 2)
   last_words = self._file.read(read_len)
   # count Is the number of newline characters 
   count = last_words.count('\n')
   
   if count>=n:
    #  The number of newline characters is greater than 10  It is easy to handle and read directly 
    last_lines = last_words.split('\n')[-n:]
    break
   #  Insufficient newline characters 10 A 
   else:
    # break
    # Not enough 10 Row 
    #  If 1 If we don't have a newline character, then we think that 1 OK, it's probably 100 A 
    if count==0:
 
     len_perline = read_len
    #  If there is 4 A line break, and we think that each line has about 250 Characters 
    else:
     len_perline = read_len/count
    #  The length to read becomes 2500 , continue to re-judge 
    read_len = len_perline * n
  for line in last_lines:
   self.callback(line+'\n')
if __name__ == '__main__':
 py_tail = Tail('test.txt')
 py_tail.follow(20)

Related articles: