Realization of Large Text File Segmentation with python

  • 2021-07-24 11:28:16
  • OfStack

In this paper, we share the specific code of python to realize large text file segmentation for your reference. The specific contents are as follows

Development environment

Python 2

Realization effect

Custom large text file segmentation can be realized by dragging and dropping files or inputting file paths.

Code implementation


 #coding:gbk
 import os,sys,shutil
 
 is_file_exits=False
 while not is_file_exits:
  files_list=[]
  if(len(sys.argv)==1):
   print(' Please enter the full path of the file to be cut :')
   files_path=raw_input().strip()
   for str_file_path in files_path.split(' '):
    if(str_file_path.strip()==''):
     continue
    if(not os.path.exists(str_file_path.strip())):
     print(str_file_path.strip()+' File path does not exist , Please re-enter !')
     is_file_exits=False
     break
    else:
     files_list.append(str_file_path.strip());
     is_file_exits=True
  else:
   for str_file_path in sys.argv[1:len(sys.argv)]:
    if(str_file_path.strip()==''):
     continue
    if(not os.path.exists(str_file_path.strip())):
     print(str_file_path.strip()+' File path does not exist , Please re-enter !')
     is_file_exits=False
     break
    else:
     files_list.append(str_file_path.strip());
     is_file_exits=True
 
 print(' File to be cut :'+str(files_list))
 
 is_continue=False
 while not is_continue:
  print(' Please enter the number of files to cut :')
  str_files_count=raw_input()
  if str_files_count.isdigit():
   is_continue=True
  else:
   print(' Please enter the correct number !')
 
 for file_path in files_list:
 
  split_file_path=''
  total_lines_count=0
  lines_count=0
  files_count=int(str_files_count)
 
  print(' Counting text lines .....')
 
  total_lines_count = len(open(file_path,'rU').readlines())
  print(' Total number of lines of text :'+str(total_lines_count))
 
  if files_count>total_lines_count:
   print(' Text too small , Not worth dividing !')
   sys.exit()
 
  (filepath,filename) = os.path.split(file_path);
  (filepathname,extension) = os.path.splitext(file_path)
 
  if os.path.exists(filepathname):
   shutil.rmtree(filepathname)
   
  os.mkdir(filepathname)
   
  lines_count=int(total_lines_count/files_count)
  mod_count=total_lines_count%files_count
 
 
  print(' File splitting in progress .....')
 
  line_num=0
  file_num=0
  temp=-1
 
  for line in open(file_path,'rU').readlines():
   if file_num<mod_count:
    file_num=int(line_num/(lines_count+1))
   else:
    file_num=int((line_num-mod_count*(lines_count+1))/lines_count+mod_count)
   
   split_file_path=filepathname+'/'+str.replace(filename,extension,'_'+str(file_num)+extension)
 
   with open(split_file_path,'a+') as split_file:
    split_file.write(line)
 
   if temp!=file_num:
    print(' Generating :'+split_file_path)
   temp=file_num
 
   line_num+=1
 
  print(file_path+' Segmentation completion !')
 
  split_file.close()
  
 os.system('pause')

Source address


Related articles: