Python implements a way to download images from a subscription
- 2020-04-02 14:40:05
- OfStack
This article illustrates a Python implementation for downloading images from a subscription. Share with you for your reference. The details are as follows:
This code is based on python 3.4, which is a lot different than python 2.x.
This is an exercise where the data source is from a netease subscription. The code is as follows:
__author__ = 'Saint'
import os
import urllib.request
import json
from html.parser import HTMLParser
# Filter the content of the image from the retrieved web content
class MyHtmlParser(HTMLParser):
links = []
def handle_starttag(self, tag, attrs):
if tag == "img":
if len(attrs) == 0:
pass
else:
for name, value in attrs:
if name == "src":
self.links.append(value)
class Down(object):
# The general catalog
img_path = "E:/saint"
# Download directory
dir = ''
# Acquisition source address
collect_links = ["http://dy.163.com/v2/media/articlelist/T1374483113516-1", "http://dy.163.com/v2/media/articlelist/T1420776257254-1", "http://dy.163.com/v2/media/articlelist/T1376641060407-1"]
img_links = "http://dy.163.com/v2/article"
def handleCollect(self):
for collect_link in self.collect_links:
notice = " Starting from the [" + collect_link + "] Collect pictures "
print(notice)
# Create the downloaded directory
dir_name = collect_link.split("/")[-1]
self.isDirExists(dir_name)
dict = self.getListFromSubscribe(collect_link)
if dict == False:
print(" Data acquisition failed, whether to continue (y/n)")
op = input();
if op == "y":
os.system("cls")
pass
elif op == "n":
print(" Stop to collect ")
break
else:
os.system("cls")
print(" Illegal input ")
break
else:
for page in dict:
page_uri = self.img_links + "/" + page["tid"] + "/" + page["docid"]
self.getImgFromUri(page_uri)
print(" Whether or not to continue (y/n)")
new_op = input();
if new_op == "n":
os.system("cls")
print(" Acquisition is ")
break
print("OK")
# Get the directory from the subscription
def getListFromSubscribe(self, uri):
res = urllib.request.urlopen(uri)
if res.code < 200 or res.code > 300:
os.system("clear")
return False
else:
result = res.read().decode("gbk") # 3.4 Version of the read() Returns the byte Type, need decode() Handle, the option is the page code
dict = json.loads(result)
if dict['code'] != 1:
print(dict['msg'])
return False
else:
return dict['data']
# Get the pages of this subscription and extract the pictures you need from them
def getImgFromUri(self, uri):
html_code = urllib.request.urlopen(uri).read().decode("gbk")
hp = MyHtmlParser()
hp.feed(html_code)
hp.close()
for link in hp.links: # hp.links Is a list of download addresses for images
self.writeToDisk(link)
# Check if the file directory exists, and if it does not, create it
def isDirExists(self, dir_name):
self.dir = self.img_path + dir_name
isExists = os.path.exists(self.dir)
if not isExists:
os.makedirs(self.dir)
return True
else:
return True
# Download the file and write to disk
def writeToDisk(self, url):
os.chdir(self.dir)
file = urllib.request.urlopen(url).read()
file_name = url.split("/")[-1]
open(file_name, "wb").write(file)
return True
if __name__ == "__main__":
down = Down()
down.handleCollect()
I hope this article has helped you with your Python programming.