python How to Remove Duplicate Fields from a File
- 2021-07-22 10:01:45
- OfStack
This article example for everyone to share python how to delete the specific code of duplicate fields in the file, for your reference, the specific content is as follows
The contents of the original file are placed in list, and the contents of the new file are searched by line. If they do not appear in list, they are written to the third file.
import csv
filetxt1 = 'E:/gg/log/log1.txt'
filecsv1 = 'E:/gg/log/log1.csv'
filecsv2 = 'E:/gg/log/log2.csv'
filecsv3 = 'E:/gg/log/log3.csv'
class operFileCsv():
def __init__(self, filename=None):
self.filename = filename
def readCsvFile(self):
readCsvHandler = open(self.filename, 'r')
filelines = csv.reader(readCsvHandler, dialect='excel')
for fileline in filelines:
print(fileline)
readCsvHandler.close
def writeCsvFile(self, writeline):
writeCsvHandler = open(self.filename, 'a', newline='')
csvWrite = csv.writer(writeCsvHandler, dialect='excel', )
csvWrite.writerow(writeline)
writeCsvHandler.close()
class getLogBuffFromFile():
def __init__(self):
self.logBuff1 = []
def getLog1Buff(self, filename):
with open(filename) as filehandler:
while True:
logOneLine = filehandler.readline().strip()
if not logOneLine:
break
self.logBuff1.append(logOneLine)
# print('TRACE: The log1 has ', len(self.logBuff1), ' lines.')
return self.logBuff1
def getLog2Buff(self, logOneLine):
pass
class deleteIterantLog():
def __init__(self):
self.logBuff1List = None
self.logBuff2OneLine = None
def deleteProcedure(self, oldlog, newlog, createlog):
self.logBuff1List = getLogBuffFromFile().getLog1Buff(oldlog)
self.dealProcedure(newlog, createlog)
def dealProcedure(self, file1name, file2name):
with open(file1name, 'r') as readCsvHandler:
filelines = csv.reader(readCsvHandler, dialect='excel')
for fileline in filelines:
if fileline[1] not in self.logBuff1List:
operFileCsv(file2name).writeCsvFile(fileline)
if __name__ == '__main__':
deleteIterantLog().deleteProcedure(filetxt1, filecsv2, filecsv3)
This site will share with you a paragraph of Python using sets to remove duplicate words in the text:
import os,sys,datetime
import codecs
with open('aaaaa.txt', 'r') as f: # Read a file into text
l = f.readlines() # txt Read all strings in the data
x=set(l[0])
for i in range(1,len(l)):
x.update(l[i])
s="".join(list(x))
print(s)
with open('result.txt','wb') as f1: # Write the results to a file result Medium
b=bytes(s,encoding="utf-8")
f1.write(b)
For more articles on python installation tutorials, please refer to "python Installation Tutorials for Various python Versions"
For more wonderful books, please click python Programming Essential Books List
Getting Dry Goods: Getting Started with Zero Basic Learning python Video Tutorial