python How to Remove Duplicate Fields from a File

  • 2021-07-22 10:01:45
  • OfStack

This article example for everyone to share python how to delete the specific code of duplicate fields in the file, for your reference, the specific content is as follows

The contents of the original file are placed in list, and the contents of the new file are searched by line. If they do not appear in list, they are written to the third file.


import csv

filetxt1 = 'E:/gg/log/log1.txt'
filecsv1 = 'E:/gg/log/log1.csv'
filecsv2 = 'E:/gg/log/log2.csv'
filecsv3 = 'E:/gg/log/log3.csv'


class operFileCsv():
 def __init__(self, filename=None):
  self.filename = filename

 def readCsvFile(self):
  readCsvHandler = open(self.filename, 'r')
  filelines = csv.reader(readCsvHandler, dialect='excel')
  for fileline in filelines:
   print(fileline)
  readCsvHandler.close

 def writeCsvFile(self, writeline):
  writeCsvHandler = open(self.filename, 'a', newline='')
  csvWrite = csv.writer(writeCsvHandler, dialect='excel', )
  csvWrite.writerow(writeline)
  writeCsvHandler.close()


class getLogBuffFromFile():
 def __init__(self):
  self.logBuff1 = []

 def getLog1Buff(self, filename):
  with open(filename) as filehandler:
   while True:
    logOneLine = filehandler.readline().strip()
    if not logOneLine:
     break
    self.logBuff1.append(logOneLine)
  # print('TRACE: The log1 has ', len(self.logBuff1), ' lines.')
  return self.logBuff1

 def getLog2Buff(self, logOneLine):
  pass


class deleteIterantLog():
 def __init__(self):
  self.logBuff1List = None
  self.logBuff2OneLine = None

 def deleteProcedure(self, oldlog, newlog, createlog):
  self.logBuff1List = getLogBuffFromFile().getLog1Buff(oldlog)
  self.dealProcedure(newlog, createlog)

 def dealProcedure(self, file1name, file2name):
  with open(file1name, 'r') as readCsvHandler:
   filelines = csv.reader(readCsvHandler, dialect='excel')
   for fileline in filelines:
    if fileline[1] not in self.logBuff1List:
     operFileCsv(file2name).writeCsvFile(fileline)


if __name__ == '__main__':
 deleteIterantLog().deleteProcedure(filetxt1, filecsv2, filecsv3)

This site will share with you a paragraph of Python using sets to remove duplicate words in the text:


import os,sys,datetime
import codecs
with open('aaaaa.txt', 'r') as f:  # Read a file into text 
 l = f.readlines() # txt Read all strings in the data
 x=set(l[0])
 for i in range(1,len(l)):
  x.update(l[i])
 s="".join(list(x))
 print(s)
with open('result.txt','wb') as f1: # Write the results to a file result Medium 
 b=bytes(s,encoding="utf-8") 
 f1.write(b)

For more articles on python installation tutorials, please refer to "python Installation Tutorials for Various python Versions"

For more wonderful books, please click python Programming Essential Books List

Getting Dry Goods: Getting Started with Zero Basic Learning python Video Tutorial


Related articles: