Example of Python crawler crawling NBA data function
- 2020-10-07 18:45:18
- OfStack
An example of Python is given to illustrate NBA's data crawling function. To share for your reference, the details are as follows:
The crawling site is ES5en-ES6en.com, which crawls the data of NBA from the 2016-2017 regular season to January 7, 2017
Change url_header and url_tail to crawl specific additional data.
The source code is as follows:
#coding=utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import requests
import time
import urllib
from bs4 import BeautifulSoup
import re
from pyExcelerator import *
def getURLLists(url_header,url_tail,pages):
"""
Get all pages URL The list of
"""
url_lists = []
url_0 = url_header+'0'+url_tail
print url_0
url_lists.append(url_0)
for i in range(1,pages+1):
url_temp = url_header+str(i)+url_tail
url_lists.append(url_temp)
return url_lists
def getNBAAllData(url_lists):
"""
Get all the 2017 season NBA Regular season data
"""
datasets = ['']
for item in url_lists:
data1 = getNBASingleData(item)
datasets.extend(data1)
# Remove empty elements from the data
for item in datasets[:]:
if len(item) == 0:
datasets.remove(item)
return datasets
def getNBASingleData(url):
"""
To obtain 1 A page NBA Regular season data
"""
# url = 'http://stat-nba.com/query_team.php?QueryType=game&order=1&crtcol=date_out&GameType=season&PageNum=3000&Season0=2016&Season1=2017'
# html = requests.get(url).text
html = urllib.urlopen(url).read()
# print html
soup = BeautifulSoup(html)
data = soup.html.body.find('tbody').text
list_data = data.split('\n')
# with open('nba_data.txt','a') as fp:
# fp.write(data)
# for item in list_data[:]:
# if len(item) == 0:
# list_data.remove(item)
return list_data
def saveDataToExcel(datasets,sheetname,filename):
book = Workbook()
sheet = book.add_sheet(sheetname)
sheet.write(0,0,u' The serial number ')
sheet.write(0,1,u' The team ')
sheet.write(0,2,u' time ')
sheet.write(0,3,u' The results of ')
sheet.write(0,4,u' host ')
sheet.write(0,5,u' The game ')
sheet.write(0,6,u' Field goal percentage ')
sheet.write(0,7,u' Hit the number ')
sheet.write(0,8,u' Number of shots ')
sheet.write(0,9,u'3 shooting ')
sheet.write(0,10,u'3 Points, hit a few ')
sheet.write(0,11,u'3 Points to a few ')
sheet.write(0,12,u' Free throw percentage ')
sheet.write(0,13,u' Number of free throws made ')
sheet.write(0,14,u' Free throw attempts ')
sheet.write(0,15,u' rebounds ')
sheet.write(0,16,u' Offensive rebounds ')
sheet.write(0,17,u' Backcourt rebound ')
sheet.write(0,18,u' assists ')
sheet.write(0,19,u' steals ')
sheet.write(0,20,u' blocks ')
sheet.write(0,21,u' error ')
sheet.write(0,22,u' A foul ')
sheet.write(0,23,u' score ')
num = 24
row_cnt = 0
data_cnt = 0
data_len = len(datasets)
print 'data_len:',data_len
while(data_cnt< data_len):
row_cnt += 1
print ' The serial number :',row_cnt
for col in range(num):
# print col
sheet.write(row_cnt,col,datasets[data_cnt])
data_cnt += 1
book.save(filename)
def writeDataToTxt(datasets):
fp = open('nba_data.txt','w')
line_cnt = 1
for i in range(len(datasets)-1):
# Action to align team names: if the team name is too short or is 76 Ers are Add two after the name of the team table Or add 1 a table
if line_cnt % 24 == 2 and len(datasets[i]) < 5 or datasets[i] == u' Philadelphia 76 people ':
fp.write(datasets[i]+'\t\t')
else:
fp.write(datasets[i]+'\t')
line_cnt += 1
if line_cnt % 24 == 1:
fp.write('\n')
fp.close()
if __name__ == "__main__":
pages = int(1132/150)
url_header = 'http://stat-nba.com/query_team.php?page='
url_tail = '&QueryType=game&order=1&crtcol=date_out&GameType=season&PageNum=3000&Season0=2016&Season1=2017#label_show_result'
url_lists = getURLLists(url_header,url_tail,pages)
datasets = getNBAAllData(url_lists)
writeDataToTxt(datasets)
sheetname = 'nba normal data 2016-2017'
str_time = time.strftime('%Y-%m-%d',time.localtime(time.time()))
filename = 'nba_normal_data'+str_time+'.xls'
saveDataToExcel(datasets,sheetname,filename)
More about Python related content to view this site project: the Python Socket programming skills summary ", "Python regular expression usage summary", "Python data structure and algorithm tutorial", "Python function using techniques", "Python string skills summary", "Python introduction and advanced tutorial" and "Python file and directory skills summary"
I hope this article has been helpful for Python programming.