Python analyzes the Nginx access logs and saves them to a MySQL database instance
- 2020-04-02 13:32:29
- OfStack
Use Python to analyze Nginx access logs, split them according to the Nginx log format and store them in MySQL database.
I. the format of Nginx access log is as follows:
$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for"' # Using the nginx Default log format
Ii. The contents of Nginx access log are as follows:
182.19.31.129 - - [2013-08-13T00:00:01-07:00] "GET /css/anniversary.css HTTP/1.1" 304 0 "http://www.chlinux.net/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36" "-"
3. The Python code for analyzing nginx logs is as follows:
#!/usr/bin/env python
#coding:utf8
import os
import fileinput
import re
import sys
import MySQLdb
# Log location
logfile=open("access_20130812.log")
# The use of nginx Default log format $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for"'
# Log analysis regular expressions
#203.208.60.230
ipP = r"?P<ip>[d.]*"
# In order to [ start , In addition to [] Any character other than Prevents the next one from matching [] project ( You can also use non-greedy matching *?) Not in brackets . Can match any character outside the swap line * And again, yes " The greedy" The expression engine tries to repeat as many times as possible. # In order to ] The end of the
#[21/Jan/2011:15:04:41 +0800]
timeP = r"""?P<time>[[^[]]*]"""
# In order to " start , # Any character other than a double quotation mark Prevents the next one from matching "" project ( You can also use non-greedy matching *?),# In order to " The end of the
#"GET /EntpShop.do?method=view&shop_id=391796 HTTP/1.1"
#"GET /EntpShop.do?method=view&shop_id=391796 HTTP/1.1"
requestP = r"""?P<request>"[^"]*""""
statusP = r"?P<status>d+"
bodyBytesSentP = r"?P<bodyByteSent>d+"
# In order to " start , Any character other than a double quotation mark Prevents the next one from matching "" project ( You can also use non-greedy matching *?),# In order to " The end of the
#"http://test.myweb.com/myAction.do?method=view&mod_id=&id=1346"
referP = r"""?P<refer>"[^"]*""""
# In order to " start , Any character other than a double quotation mark Prevents the next one from matching "" project ( You can also use non-greedy matching *?), In order to " The end of the
#"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"'
userAgentP = r"""?P<userAgent>"[^"]*""""
# In order to ( start , Any character other than a double quotation mark Prevents the next one from matching () project ( You can also use non-greedy matching *?), In order to " The end of the
#(compatible; Googlebot/2.1; +http://www.google.com/bot.html)"'
userSystems = re.compile(r'([^()]*)')
# In order to " Initially, any character other than a double quote prevents the next character from matching "" project ( You can also use non-greedy matching *?), In order to " The end of the
userlius = re.compile(r'[^)]*"')
# Principle: mainly through the space and - To distinguish the different items, each item to write their own matching expressions
nginxLogPattern = re.compile(r"(%s) - - (%s) (%s) (%s) (%s) (%s) (%s)" %(ipP, timeP, requestP, statusP, bodyBytesSentP, referP, userAgentP), re.VERBOSE)
# Database connection information
conn=MySQLdb.connect(host='192.168.1.22',user='test',passwd='pass',port=3306,db='python')
cur=conn.cursor()
sql = "INSERT INTO python.test VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
while True:
line = logfile.readline()
if not line:break
matchs = nginxLogPattern.match(line)
if matchs != None:
allGroup = matchs.groups()
ip = allGroup[0]
time = allGroup[1]
request = allGroup[2]
status = allGroup[3]
bodyBytesSent = allGroup[4]
refer = allGroup[5]
userAgent = allGroup[6]
Time = time.replace('T',' ')[1:-7]
if len(userAgent) > 20:
userinfo = userAgent.split(' ')
userkel = userinfo[0]
try:
usersystem = userSystems.findall(userAgent)
usersystem = usersystem[0]
print usersystem
userliu = userlius.findall(userAgent)
value = [ip,Time,request,status,bodyBytesSent,refer,userkel,usersystem,userliu[1]]
conn.commit()
print value
except IndexError:
userinfo = userAgent
value = [ip,Time,request,status,bodyBytesSent,refer,userinfo,"",""]
else:
useraa = userAgent
value = [ip,Time,request,status,bodyBytesSent,refer,useraa,"",""]
try:
result = cur.execute(sql,value)
#conn.commit()
print result
except MySQLdb.Error,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])
conn.commit()
conn.close()
Iv. The data stored in the database is as follows: