Python script implementation download merge SAE logs

  • 2020-04-02 14:37:03
  • OfStack

For some reason, log files from the site on the SAE are required, which can only be downloaded by the day from the SAE. Fortunately, the SAE provides an API for bulk access to log file download addresses, and just wrote a python script to automatically download and merge these files

Call the API to get the download address

The document is located at (link: http://sae.sina.com.cn/? M = devcenter&catId = 281)

Set your own application and download parameters

The following variables need to be set in the request


api_url = 'http://dloadcenter.sae.sina.com.cn/interapi.php?'
appname = 'xxxxx'
from_date = '20140101'
to_date = '20140116'
url_type = 'http' # http|taskqueue|cron|mail|rdc
url_type2 = 'access' # only when type=http  access|debug|error|warning|notice|resources
secret_key = 'xxxxx'

Generate request address

The request address generation method can see the requirements of the official website:

1. Sort the parameters
2. Generate the request string, minus &
3. Additional access_key
4. Request string to md5, form sign
5. Add sign to the request string

The specific implementation code is as follows


params = dict()
params['act'] = 'log'
params['appname'] = appname
params['from'] = from_date
params['to'] = to_date
params['type'] = url_type if url_type == 'http':
    params['type2'] = url_type2 params = collections.OrderedDict(sorted(params.items())) request = ''
for k,v in params.iteritems():
    request += k+'='+v+'&' sign = request.replace('&','')
sign += secret_key md5 = hashlib.md5()
md5.update(sign)
sign = md5.hexdigest() request = api_url + request + 'sign=' + sign if response['errno'] != 0:
    print '[!] '+response['errmsg']
    exit() print '[#] request success'

Download log file

SAE packages each day's log files into a tar.gz format, which you can download and save, with names named after date.tar.gz


log_files = list() for down_url in response['data']:   
    file_name = re.compile(r'd{4}-d{2}-d{2}').findall(down_url)[0] + '.tar.gz'
    log_files.append(file_name)
    data = urllib2.urlopen(down_url).read()
    with open(file_name, "wb") as file:
        file.write(data) print '[#] you got %d log files' % len(log_files)

Merge files

The merge file approach USES the trafile library to unzip each file and append the file contents to the access_log


# compress these files to access_log
access_log = open('access_log','w'); for log_file in log_files:
    tar = tarfile.open(log_file)
    log_name = tar.getnames()[0]
    tar.extract(log_name)
    # save to access_log
    data = open(log_name).read()
    access_log.write(data)
    os.remove(log_name) print '[#] all file has writen to access_log'

The complete code


#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author: Su Yan <http://yansu.org>
# @Date:   2014-01-17 12:05:19
# @Last Modified by:   Su Yan
# @Last Modified time: 2014-01-17 14:15:41 import os
import collections
import hashlib
import urllib2
import json
import re
import tarfile # settings
# documents http://sae.sina.com.cn/?m=devcenter&catId=281
api_url = 'http://dloadcenter.sae.sina.com.cn/interapi.php?'
appname = 'yansublog'
from_date = '20140101'
to_date = '20140116'
url_type = 'http' # http|taskqueue|cron|mail|rdc
url_type2 = 'access' # only when type=http  access|debug|error|warning|notice|resources
secret_key = 'zwzim4zhk35i50003kz2lh3hyilz01m03515j0i5' # encode request
params = dict()
params['act'] = 'log'
params['appname'] = appname
params['from'] = from_date
params['to'] = to_date
params['type'] = url_type if url_type == 'http':
    params['type2'] = url_type2 params = collections.OrderedDict(sorted(params.items())) request = ''
for k,v in params.iteritems():
    request += k+'='+v+'&' sign = request.replace('&','')
sign += secret_key md5 = hashlib.md5()
md5.update(sign)
sign = md5.hexdigest() request = api_url + request + 'sign=' + sign # request api
response = urllib2.urlopen(request).read()
response = json.loads(response) if response['errno'] != 0:
    print '[!] '+response['errmsg']
    exit() print '[#] request success' # download and save files
log_files = list() for down_url in response['data']:   
    file_name = re.compile(r'd{4}-d{2}-d{2}').findall(down_url)[0] + '.tar.gz'
    log_files.append(file_name)
    data = urllib2.urlopen(down_url).read()
    with open(file_name, "wb") as file:
        file.write(data) print '[#] you got %d log files' % len(log_files) # compress these files to access_log
access_log = open('access_log','w'); for log_file in log_files:
    tar = tarfile.open(log_file)
    log_name = tar.getnames()[0]
    tar.extract(log_name)
    # save to access_log
    data = open(log_name).read()
    access_log.write(data)
    os.remove(log_name) print '[#] all file has writen to access_log'


Related articles: