Python implements a way to download images from a subscription

  • 2020-04-02 14:40:05
  • OfStack

This article illustrates a Python implementation for downloading images from a subscription. Share with you for your reference. The details are as follows:

This code is based on python 3.4, which is a lot different than python 2.x.
This is an exercise where the data source is from a netease subscription. The code is as follows:

__author__ = 'Saint'
import os
import urllib.request
import json
from html.parser import HTMLParser
# Filter the content of the image from the retrieved web content
class MyHtmlParser(HTMLParser):
    links = []
    def handle_starttag(self, tag, attrs):
        if tag == "img":
            if len(attrs) == 0:
                pass
            else:
                for name, value in attrs:
                    if name == "src":
                        self.links.append(value)
class Down(object):
    # The general catalog
    img_path = "E:/saint"
    # Download directory
    dir = ''
    # Acquisition source address
    collect_links = ["http://dy.163.com/v2/media/articlelist/T1374483113516-1", "http://dy.163.com/v2/media/articlelist/T1420776257254-1", "http://dy.163.com/v2/media/articlelist/T1376641060407-1"]
    img_links = "http://dy.163.com/v2/article"
    def handleCollect(self):
        for collect_link in self.collect_links:
            notice = " Starting from the [" + collect_link + "] Collect pictures "
            print(notice)
            # Create the downloaded directory
            dir_name = collect_link.split("/")[-1]
            self.isDirExists(dir_name)
            dict = self.getListFromSubscribe(collect_link)
            if dict == False:
                print(" Data acquisition failed, whether to continue (y/n)")
                op = input();
                if op == "y":
                    os.system("cls")
                    pass
                elif op == "n":
                    print(" Stop to collect ")
                    break
                else:
                    os.system("cls")
                    print(" Illegal input ")
                    break
            else:
                for page in dict:
                    page_uri = self.img_links + "/" + page["tid"] + "/" + page["docid"]
                    self.getImgFromUri(page_uri)
                    print(" Whether or not to continue (y/n)")
                    new_op = input();
                    if new_op == "n":
                        os.system("cls")
                        print(" Acquisition is ")
                        break
        print("OK")
    # Get the directory from the subscription
    def getListFromSubscribe(self, uri):
        res = urllib.request.urlopen(uri)
        if res.code < 200 or res.code > 300:
            os.system("clear")
            return False
        else:
            result = res.read().decode("gbk") # 3.4 Version of the read() Returns the byte Type, need decode() Handle, the option is the page code
            dict = json.loads(result)
            if dict['code'] != 1:
                print(dict['msg'])
                return False
            else:
                return dict['data']
    # Get the pages of this subscription and extract the pictures you need from them
    def getImgFromUri(self, uri):
        html_code = urllib.request.urlopen(uri).read().decode("gbk")
        hp = MyHtmlParser()
        hp.feed(html_code)
        hp.close()
 
        for link in hp.links: # hp.links Is a list of download addresses for images
            self.writeToDisk(link)
    # Check if the file directory exists, and if it does not, create it
    def isDirExists(self, dir_name):
        self.dir = self.img_path + dir_name
        isExists = os.path.exists(self.dir)
        if not isExists:
            os.makedirs(self.dir)
            return True
        else:
            return True
    # Download the file and write to disk
    def writeToDisk(self, url):
        os.chdir(self.dir)
        file = urllib.request.urlopen(url).read()
        file_name = url.split("/")[-1]
        open(file_name, "wb").write(file)
        return True
if __name__ == "__main__":
    down = Down()
    down.handleCollect()

I hope this article has helped you with your Python programming.


Related articles: