Python multithreaded HTTP download implementation example

2020-04-02 13:20:56
OfStack

The test platform Ubuntu 13.04 X86_64 Python 2.7.4

It took nearly two hours, and the main problem was that I didn't think of transferring a file object into the thread at first, which resulted in the downloaded file being different from the source file MD5, which wasted a lot of time.

Those of you who are interested can take it and add a parameter, improve it, or add a breakpoint to continue.


# -*- coding: utf-8 -*-
# Author: ToughGuy
# Email: wj0630@gmail.com
#  I wrote this stuff to get a feel for it python Multithreading mechanism 
#  I don't have the habit of writing comments ,  Taking the time to comment in the code this time is also a good way to correct any problems ,  Because maybe I didn't figure it out myself .
#  The test platform  Ubuntu 13.04 X86_64 Python 2.7.4
import threading
import urllib2
import sys
max_thread = 10
#  Initialize the lock 
lock = threading.RLock()
class Downloader(threading.Thread):
    def __init__(self, url, start_size, end_size, fobj, buffer):
        self.url = url
        self.buffer = buffer
        self.start_size = start_size
        self.end_size = end_size
        self.fobj = fobj
        threading.Thread.__init__(self)
    def run(self):
        """
             Just a waistcoat 
        """
        with lock:
            print 'starting: %s' % self.getName()
        self._download()
    def _download(self):
        """
             I'm the one who moved the bricks 
        """
        req = urllib2.Request(self.url)
        #  add HTTP Header(RANGE) Set the scope of the data to be downloaded 
        req.headers['Range'] = 'bytes=%s-%s' % (self.start_size, self.end_size)
        f = urllib2.urlopen(req)
        #  Initializes the current thread file object offset 
        offset = self.start_size
        while 1:
            block = f.read(self.buffer)
            #  The current thread exits after data acquisition 
            if not block:
                with lock:
                    print '%s done.' % self.getName()
                break
            #  Of course, threads must be locked when writing data 
            #  use  with lock  Alternative to traditional  lock.acquire().....lock.release()
            #  Need to be python >= 2.5
            with lock:
                sys.stdout.write('%s saveing block...' % self.getName())
                #  Sets the offset address of the file object 
                self.fobj.seek(offset)
                #  Writes the retrieved data 
                self.fobj.write(block)
                offset = offset + len(block)
                sys.stdout.write('done.n')

def main(url, thread=3, save_file='', buffer=1024):
    #  The maximum number of threads cannot be exceeded max_thread
    thread = thread if thread <= max_thread else max_thread
    #  Gets the size of the file 
    req = urllib2.urlopen(url)
    size = int(req.info().getheaders('Content-Length')[0])
    #  Initializes the file object 
    fobj = open(save_file, 'wb')
    #  Based on the number of threads   Each thread is responsible for http Range  The size of the 
    avg_size, pad_size = divmod(size, thread)
    plist = []
    for i in xrange(thread):
        start_size = i*avg_size
        end_size = start_size + avg_size - 1
        if i == thread - 1:
            #  The last thread plus pad_size
            end_size = end_size + pad_size + 1
        t = Downloader(url, start_size, end_size, fobj, buffer)
        plist.append(t)
    #   Start to move the brick 
    for t in plist:
        t.start()
    #  Wait for all threads to finish 
    for t in plist:
        t.join()
    #  End of course remember to close the file object 
    fobj.close()
    print 'Download completed!'
if __name__ == '__main__':
    url = 'http://192.168.1.2:8082/downloads/10M.zip'
    main(url=url, thread=10, save_file='test.iso', buffer=4096)