Python multithreaded HTTP download implementation example
- 2020-04-02 13:20:56
- OfStack
The test platform Ubuntu 13.04 X86_64 Python 2.7.4
It took nearly two hours, and the main problem was that I didn't think of transferring a file object into the thread at first, which resulted in the downloaded file being different from the source file MD5, which wasted a lot of time.
Those of you who are interested can take it and add a parameter, improve it, or add a breakpoint to continue.
# -*- coding: utf-8 -*-
# Author: ToughGuy
# Email: wj0630@gmail.com
# I wrote this stuff to get a feel for it python Multithreading mechanism
# I don't have the habit of writing comments , Taking the time to comment in the code this time is also a good way to correct any problems , Because maybe I didn't figure it out myself .
# The test platform Ubuntu 13.04 X86_64 Python 2.7.4
import threading
import urllib2
import sys
max_thread = 10
# Initialize the lock
lock = threading.RLock()
class Downloader(threading.Thread):
def __init__(self, url, start_size, end_size, fobj, buffer):
self.url = url
self.buffer = buffer
self.start_size = start_size
self.end_size = end_size
self.fobj = fobj
threading.Thread.__init__(self)
def run(self):
"""
Just a waistcoat
"""
with lock:
print 'starting: %s' % self.getName()
self._download()
def _download(self):
"""
I'm the one who moved the bricks
"""
req = urllib2.Request(self.url)
# add HTTP Header(RANGE) Set the scope of the data to be downloaded
req.headers['Range'] = 'bytes=%s-%s' % (self.start_size, self.end_size)
f = urllib2.urlopen(req)
# Initializes the current thread file object offset
offset = self.start_size
while 1:
block = f.read(self.buffer)
# The current thread exits after data acquisition
if not block:
with lock:
print '%s done.' % self.getName()
break
# Of course, threads must be locked when writing data
# use with lock Alternative to traditional lock.acquire().....lock.release()
# Need to be python >= 2.5
with lock:
sys.stdout.write('%s saveing block...' % self.getName())
# Sets the offset address of the file object
self.fobj.seek(offset)
# Writes the retrieved data
self.fobj.write(block)
offset = offset + len(block)
sys.stdout.write('done.n')
def main(url, thread=3, save_file='', buffer=1024):
# The maximum number of threads cannot be exceeded max_thread
thread = thread if thread <= max_thread else max_thread
# Gets the size of the file
req = urllib2.urlopen(url)
size = int(req.info().getheaders('Content-Length')[0])
# Initializes the file object
fobj = open(save_file, 'wb')
# Based on the number of threads Each thread is responsible for http Range The size of the
avg_size, pad_size = divmod(size, thread)
plist = []
for i in xrange(thread):
start_size = i*avg_size
end_size = start_size + avg_size - 1
if i == thread - 1:
# The last thread plus pad_size
end_size = end_size + pad_size + 1
t = Downloader(url, start_size, end_size, fobj, buffer)
plist.append(t)
# Start to move the brick
for t in plist:
t.start()
# Wait for all threads to finish
for t in plist:
t.join()
# End of course remember to close the file object
fobj.close()
print 'Download completed!'
if __name__ == '__main__':
url = 'http://192.168.1.2:8082/downloads/10M.zip'
main(url=url, thread=10, save_file='test.iso', buffer=4096)