Write scripts with Python for examples of full and incremental backups

  • 2020-08-22 22:13:01
  • OfStack

Requirements:

There are two folders, dst and src, under /root/backup. Full backups are required on Monday and incremental backups on the rest of the days. Backup from src to dst.

Ideas and Key points:

Create a file that dictionaries the file name of src and the value of md5 for the file

For full backup, write the file name and md5 value in one file. cPickle.

Compare whether the file name is in key when doing incremental backup. If so, the md5 value of this file should be backed up if it has changed

os. path. join() splicing path, ES28en. listdir(), ES30en. chdir()

time.strftime() judge the day of the week

cPickle, which can losslessly record all variable types of Python. File operation.

tarfile's use of file packaging

hashlib is used to calculate the value of the file md5. Pay attention not to open 1 file at a time, 4k to open, to prevent the opening of a large file burst memory.

with file() can open 1 file without f.close ()


#!/usr/bin/env python
import time
import os
import cPickle as p
import tarfile
import hashlib
baseDir = '/root/backup'
srcDir = 'src'
dstDir = 'dst'
fullName = "full_%s_%s.tar.gz" % (srcDir, time.strftime('%Y%m%d'))
incrName = "incr_%s_%s.tar.gz" % (srcDir, time.strftime('%Y%m%d'))
md5file = 'md5.data'
def md5sum(fname):
 m = hashlib.md5()
 with file(fname) as f:
  while True:
   data = f.read(4096)
   if len(data) == 0:
    break
   m.update(data)
 return m.hexdigest()
def fullBackup():
 md5Dict = {}
 fileList = os.listdir(os.path.join(baseDir,srcDir))
 for eachFile in fileList:
  md5Dict[eachFile] = md5sum(os.path.join(baseDir,srcDir,eachFile))
 with file(os.path.join(baseDir,dstDir,md5file),'w') as f:
  p.dump(md5Dict,f)
 tar = tarfile.open(os.path.join(baseDir,dstDir,fullName),'w:gz')
 os.chdir(baseDir)
 tar.add(srcDir)
 tar.close()
def incrBackup():
 newmd5 = {}
 fileList = os.listdir(os.path.join(baseDir,srcDir))
 for eachFile in fileList:
  newmd5[eachFile] = md5sum(os.path.join(baseDir,srcDir,eachFile))
 with file(os.path.join(baseDir,dstDir,md5file)) as f:
  storedmd5 = p.load(f)
 tar = tarfile.open(os.path.join(baseDir,dstDir,incrName),'w:gz')
 os.chdir(baseDir)
 for eachKey in newmd5:
  if (eachKey not in storedmd5) or (newmd5[eachKey] != storedmd5[eachKey]):
   tar.add(os.path.join(srcDir,eachKey))
 tar.close()
 with file(os.path.join(baseDir,dstDir,md5file),'w') as f:
  p.dump(newmd5,f)
def main():
 if time.strftime('%a') == 'Mon':
  fullBackup()
 else:
  incrBackup()
if __name__ == '__main__':
 main()
~  

Related articles: