Realization of Large Text File Segmentation with python
- 2021-07-24 11:28:16
- OfStack
In this paper, we share the specific code of python to realize large text file segmentation for your reference. The specific contents are as follows
Development environment
Python 2
Realization effect
Custom large text file segmentation can be realized by dragging and dropping files or inputting file paths.
Code implementation
#coding:gbk
import os,sys,shutil
is_file_exits=False
while not is_file_exits:
files_list=[]
if(len(sys.argv)==1):
print(' Please enter the full path of the file to be cut :')
files_path=raw_input().strip()
for str_file_path in files_path.split(' '):
if(str_file_path.strip()==''):
continue
if(not os.path.exists(str_file_path.strip())):
print(str_file_path.strip()+' File path does not exist , Please re-enter !')
is_file_exits=False
break
else:
files_list.append(str_file_path.strip());
is_file_exits=True
else:
for str_file_path in sys.argv[1:len(sys.argv)]:
if(str_file_path.strip()==''):
continue
if(not os.path.exists(str_file_path.strip())):
print(str_file_path.strip()+' File path does not exist , Please re-enter !')
is_file_exits=False
break
else:
files_list.append(str_file_path.strip());
is_file_exits=True
print(' File to be cut :'+str(files_list))
is_continue=False
while not is_continue:
print(' Please enter the number of files to cut :')
str_files_count=raw_input()
if str_files_count.isdigit():
is_continue=True
else:
print(' Please enter the correct number !')
for file_path in files_list:
split_file_path=''
total_lines_count=0
lines_count=0
files_count=int(str_files_count)
print(' Counting text lines .....')
total_lines_count = len(open(file_path,'rU').readlines())
print(' Total number of lines of text :'+str(total_lines_count))
if files_count>total_lines_count:
print(' Text too small , Not worth dividing !')
sys.exit()
(filepath,filename) = os.path.split(file_path);
(filepathname,extension) = os.path.splitext(file_path)
if os.path.exists(filepathname):
shutil.rmtree(filepathname)
os.mkdir(filepathname)
lines_count=int(total_lines_count/files_count)
mod_count=total_lines_count%files_count
print(' File splitting in progress .....')
line_num=0
file_num=0
temp=-1
for line in open(file_path,'rU').readlines():
if file_num<mod_count:
file_num=int(line_num/(lines_count+1))
else:
file_num=int((line_num-mod_count*(lines_count+1))/lines_count+mod_count)
split_file_path=filepathname+'/'+str.replace(filename,extension,'_'+str(file_num)+extension)
with open(split_file_path,'a+') as split_file:
split_file.write(line)
if temp!=file_num:
print(' Generating :'+split_file_path)
temp=file_num
line_num+=1
print(file_path+' Segmentation completion !')
split_file.close()
os.system('pause')
Source address