Python parses weather data from China weather network

  • 2020-04-02 13:31:48
  • OfStack

Method of use: enter in terminal

python weather.py http://www.weather.com.cn/weather/101010100.shtml

Json format for Beijing weather data for 6 days

#coding=utf-8  
#weather.py  
import urllib  
import re  
import simplejson  
import sys  

if len(sys.argv) != 2:  
    print 'please enter: python ' + sys.argv[0] + '  <url>'  
    exit(0)   
url = sys.argv[1];  
def readurlPageContent(url):# Gets all the data for the page   Read line by line   
    webpage = urllib.urlopen(url);  
    line = webpage.readline();  
    data = ''  
    while line:  
        data = data + line.strip(); # Remove Spaces on both sides of each line   
        line = webpage.readline();   
    return data  

def getDatabody(data):  
    reg = re.compile(r'(<div class="weatherYubaoBox">(?:(?!<div)(?!</div).)*</div>)');  
    matchs = reg.findall(data,re.I);  
    if len(matchs) > 0:  
        return matchs[0]  
    return None  

def getSixDayWeather(data):  
    regs = re.compile(r'>((?:(?!<)(?!>)(?!var).)+)<');  
    datas = regs.findall(data)  
    if len(datas) > 12:  
        datas = datas[12:len(datas)-7];# extracted 78 item   Behind the 6 The weather data of the day   Daily information is available 13 a   

        return datas      
    return None  

  
data = readurlPageContent(url);  
match_data = getDatabody(data)  
if match_data == None :  
    print 'get weather data fail'  
    exit(0)  

weathers_data = getSixDayWeather(match_data)  
if weathers_data == None:  
    print 'get six day info fail'  
    exit(0)  

count = len(weathers_data);  
groups_item_count = count / 6;  
weathers = {};  
groups = [];  

start = 0  
for item in weathers_data :  
    if (start % groups_item_count) == 0:  
        groups = [];  
        weathers[('day'+str(start/groups_item_count + 1))] = groups;  

    groups.append(item)  
    start = start + 1  

  
print simplejson.dumps(weathers, encoding='UTF-8', ensure_ascii=False)  

Note: the third party JSON library simplejson is used in this article. The installation method is as follows:
1. Download: http://pypi.python.org/pypi/simplejson/, the file is compressed files
2. Unzip: you can directly right-click to unzip a file, such as: D:/simplejson
3. Start -- run -- enter command: CMD
4. Go to the directory (e.g. : D:/simplejson)

    cd D:/simplejson

5. Run the installation file: setup.py install


Related articles: