Realization method of Python reading xlsx file
- 2021-07-09 08:47:04
- OfStack
The script is as follows:
from openpyxl import load_workbook
workbook = load_workbook(u'/tmp/test.xlsx') # Find the need xlsx Location of the file
booksheet = workbook.active # Gets the currently active sheet, The default is the first 1 A sheet
# If you want to get something else sheet Page takes the following approach, first getting all sheet Page name, after specifying the 1 Page.
# sheets = workbook.get_sheet_names() # Get from the name sheet
# booksheet = workbook.get_sheet_by_name(sheets[0])
# Get sheet Row data for a page
rows = booksheet.rows
# Get sheet Column data of the page
columns = booksheet.columns
i = 0
# Iterate all rows
for row in rows:
i = i + 1
line = [col.value for col in row]
cell_data_1 = booksheet.cell(row=i, column=3).value # Get the i Row 1 Column data
cell_data_2 = booksheet.cell(row=i, column=4).value # Get the i Row 2 Column data
cell_data_3 = booksheet.cell(row=i, column=8).value # Get the i Row 3 Column data
cell_data_4 = booksheet.cell(row=i, column=18).value # Get the i Row 4 Column data
print (cell_data_1, cell_data_2, cell_data_3, cell_data_4)
Example: python reads excel data for classification statistics
A certain excel records a person's call record, the following program will count the call time according to the call place, call type, etc.
# -*- coding:utf-8 -*-
import xlrd
import datetime
infos=[]
info_file=xlrd.open_workbook('src.xls')# Open excel Documents
info_sheet=info_file.sheets()[0]# Get worksheets by index order
row_count=info_sheet.nrows# Get the number of rows and columns ncols
for row in range(1,row_count):
time_string=info_sheet.cell(row,3).value
time_s_sp=time_string.split(':')
infos.append(# The array contains row_count A dictionary
{
'type':info_sheet.cell(row,2).value,# Gets cell, call type, caller and called
'other_cellphone':info_sheet.cell(row,0).value,# Opposite number ,
'timespan':datetime.timedelta(seconds=int(time_s_sp[2]),minutes=int(time_s_sp[1]),hours=int(time_s_sp[0])),
'gpscity':info_sheet.cell(row,5).value# Is the call local or foreign
}
)
time_all=datetime.timedelta(seconds=0)# Initialization
time_types={}
time_classes={}
time_numbers={}
for infor in infos:# Take out the dictionary in the array
time_all +=infor['timespan']# Find the total number of calls
infor_type=infor['type']
if infor_type in time_types:
time_types[infor_type]+=infor['timespan']
else:
time_types[infor_type]=infor['timespan']# Talk time by call type
infor_class=infor['gpscity']
if infor_class in time_classes:
time_classes[infor_class]+=infor['timespan']
else:
time_classes[infor_class]=infor['timespan']# This is equivalent to classifying and assigning values first and then ++ According to the attribution, the call time is counted
infor_number=infor['other_cellphone']
if infor_number in time_numbers:
time_numbers[infor_number]+=infor['timespan']
else:
time_numbers[infor_number]=infor['timespan']# Count the call time according to the number
print ' Total talk time: %s' % time_all
print
print ' Classification of total call mode '
for k,v in time_types.items():
print k.encode('utf-8'),v
print
print ' Call type classification :'
for k,v in time_classes.items():
print k.encode('utf-8'),v
print
print ' Counter number classification: '
for k,v in time_numbers.items():
print k,v
Optimize the code again
# -*- coding:utf-8 -*-
import xlrd
from datetime import timedelta
def read_excel(file_excel):# Read excel And put the required data classification in the array
infos=[]
info_file=xlrd.open_workbook(file_excel)
info_sheet=info_file.sheets()[0]
row_count=info_sheet.nrows
for row in range(1,row_count):
time_string=info_sheet.cell(row,3).value
time_s_sp=time_string.split(':')
infos.append(
{
'type':info_sheet.cell(row,2).value,
'other_cellphone':info_sheet.cell(row,0).value,
'timespan':timedelta(seconds=int(time_s_sp[2]),minutes=int(time_s_sp[1]),hours=int(time_s_sp[0])),
'gpscity':info_sheet.cell(row,5).value
}
)
return infos
def count_cell(list_dirs,infotype):# Statistical total calls and classified statistical results are stored in the dictionary
result_dir={}
time_all=timedelta(seconds=0)
for list_dir in list_dirs:
time_all +=list_dir['timespan']
info_type = list_dir[infotype]
if info_type not in result_dir:
result_dir[info_type]=list_dir['timespan']
else:
result_dir[info_type]+=list_dir['timespan']
return time_all,result_dir
def print_result(result_dir):# Print data
for k,v in result_dir.items():
print k.encode('utf-8'),v
if __name__=="__main__":
list_dirs=read_excel('src.xls')
time_all,result_type=count_cell(list_dirs,'type')
result_cell=count_cell(list_dirs,'other_cellphone')
result_gpscity = count_cell(list_dirs, 'gpscity')
print ' Total talk time: %s' % time_all
print ' Classified by call type: '
print_result(result_type)
print ' Classified by number: '
print_result(result_cell[1])
print ' Classification according to attribution: '
print_result(result_gpscity[1])