python traverses all excel files under the folder
- 2020-06-23 00:48:59
- OfStack
1 heap table is often used in big data processing, and then the data needs to be imported into 1 list for various algorithm analysis. Let me briefly introduce my own practice:
1. How do I read excel files
There are many versions online, based on the xlrd module, find 1 source code:
import xdrlib ,sys
import xlrd
def open_excel(file="C:/Users/flyminer/Desktop/ new Microsoft Excel The worksheet .xlsx"):
data = xlrd.open_workbook(file)
return data
# Fetch by index Excel Data in a table parameter :file : Excel The file path colnameindex : The row on which the header column name is located . by_index : The index of the table
def excel_table_byindex(file="C:/Users/flyminer/Desktop/ new Microsoft Excel The worksheet .xlsx",colnameindex=0,by_index=0):
data = open_excel(file)
table = data.sheets()[by_index]
nrows = table.nrows # The number of rows
ncols = table.ncols # The number of columns
colnames = table.row_values(colnameindex) # some 1 Rows of data
list =[]
for rownum in range(1,nrows):
row = table.row_values(rownum)
if row:
app = {}
for i in range(len(colnames)):
app[colnames[i]] = row[i]
list.append(app)
return list
# Get by name Excel Data in a table parameter :file : Excel The file path colnameindex : The row on which the header column name is located . by_name : Sheet1 The name of the
def excel_table_byname(file="C:/Users/flyminer/Desktop/ new Microsoft Excel The worksheet .xlsx",colnameindex=0,by_name=u'Sheet1'):
data = open_excel(file)
table = data.sheet_by_name(by_name)
nrows = table.nrows # The number of rows
colnames = table.row_values(colnameindex) # some 1 Rows of data
list =[]
for rownum in range(1,nrows):
row = table.row_values(rownum)
if row:
app = {}
for i in range(len(colnames)):
app[colnames[i]] = row[i]
list.append(app)
return list
def main():
tables = excel_table_byindex()
for row in tables:
print(row)
tables = excel_table_byname()
for row in tables:
print(row)
if __name__=="__main__":
main()
The last sentence is the point, so give the code person a "like" here too!
The last sentence makes the functions in the code reusable. Simply put: assuming the file name is a, import a in the program can use a.excel_table_byname () and a.excel_table_byindex ().
2. Then traverse the folder to get the excel file and the path:, the original code is as follows:
import os
import xlrd
import test_wy
xpath="E:/ Tang Weijie / Electric power / General folder for power system / Zhoushan power "
xtype="xlsx"
typedata = []
name = []
raw_data=[]
file_path=[]
def collect_xls(list_collect,type1):
# Gets all of the items in the list type file
for each_element in list_collect:
if isinstance(each_element,list):
collect_xls(each_element,type1)
elif each_element.endswith(type1):
typedata.insert(0,each_element)
return typedata
# Read all folders xls file
def read_xls(path,type2):
# Iterate over the path folder
for file in os.walk(path):
for each_list in file[2]:
file_path=file[0]+"/"+each_list
#os.walk() The function returns 3 Parameter: path, subfolder, file under path, using string concatenation file[0] and file[2] Gets the path to the file
name.insert(0,file_path)
all_xls = collect_xls(name, type2)
# Iterate over all type File path and read the data
for evey_name in all_xls:
xls_data = xlrd.open_workbook(evey_name)
for each_sheet in xls_data.sheets():
sheet_data=test_wy.excel_table_byname(evey_name,0,each_sheet.name)
# Please refer to read excel File code
raw_data.insert(0, sheet_data)
print(each_sheet.name,":Data has been done.")
return raw_data
a=read_xls(xpath,xtype)
print("Victory")
Different ideas are welcome.