python implements txt file format conversion to arff format
- 2020-10-23 20:09:47
- OfStack
This article shares the specific code of python to convert txt file format to arff format for your reference. The specific content is as follows
The default is to read the file out of the character, so there is a bit of a problem with the conversion, but it can still be used.
The file requires the first line to be the name of your property, followed by the number.
import sys
import re
relationname = ""
filename = ""
if (len(sys.argv)<2):
print("Usage:\npython arff.py MyRelationName filename.txt")
else:
relationname = sys.argv[1]
filename = sys.argv[2]
class Arff:
def __init__(self, r, f):
self.relationname = r if r is not "" else "MachineLearning"
f = f if f is not "" else "MMG_data.txt"
self.file1 = open(f, 'r')
self.data = []
self.names = []
self.parseData()
self.writeToFile()
def parseData(self):
firstLine = True
for line in self.file1.readlines():
if not firstLine:
try:
line = line.replace("\n", "")
words = line.split(" ")
except ValueError:
print("cant parse file!!")
self.data.append(words)
else:
firstLine = False
line = line.replace("\n", "")
words = line.split(" ")
self.names = words
def getType(self, value):
v = ""
if(type(value) == type(1)):
v = "numeric"
elif(type(value) == type(1.0)):
v = "numeric"
elif(re.match("[0-9]{4}\-[0-9]{2}\-[0-9]{2}\s[0-9]{2}\:[0-9]{2}\:[0-9]{2}", value)):
v = "date " + "yyyy-MM-dd HH:mm:ss"
elif(type(value) == type("string")):
v = "string"
elif(v == ""):
print("Data type "+value+" not supported yet.")
return v
def writeToFile(self):
values = self.data[0]
file2 = open("Dexhunter_test_result.arff", 'w+' )
self.relationname+="\n"
relationString = '@RELATION ' + self.relationname
file2.write(''+relationString+'')
for i in range(len(self.names)):
str2 = "@ATTRIBUTE " + self.names[i] + " " + self.getType( values[i] ) + "\n"
file2.write(''+str2+'')
file2.write('''''@DATA\n''')
for line in self.data:
try:
file2.write(",".join(line)+"\n")
except UnicodeEncodeError:
print("cant write Data to file!!")
Arff(relationname, filename)