Details of how Python handles data in XML format
- 2020-05-27 06:01:18
- OfStack
This example demonstrates how Python handles data in XML format. I will share it with you for your reference as follows:
The operation here is based on the Python3 platform.
When dealing with XML using Python, the first problem you encounter is the encoding problem.
Python does not support gb2312, so there will be an error with XML file encoding="gb2312". The encoding of the file itself read by Python can also cause an exception to be thrown, in which case the encoding needs to be specified when the file is opened. In addition, it is the Chinese language contained in the nodes in XML.
In my case, it's a little easier. I just need to modify the encoding header of XML.
#!/usr/bin/env python
import os, sys
import re
def replaceXmlEncoding(filepath, oldEncoding='gb2312', newEncoding='utf-8'):
f = open(filepath, mode='r')
content = f.read()
content = re.sub(oldEncoding, newEncoding, content)
f.close()
f = open(filepath, mode='w')
f.write(content)
f.close()
if __name__ == "__main__":
replaceXmlEncoding('./ActivateAccount.xml')
The XML file is then manipulated using xml.etree.ElementTree.
Defining the function with 1 class allows the class to be callable, such as the last few lines of the following code, with the function with s 33en__. This is also highlighted in the Python world where all 1's are objects, including the object itself :)
I think the function of s 37en__ is very good for testing.
#!/usr/bin/env python
import os, re
import xml.etree.ElementTree as etree
Locale_Path = "./locale.txt"
class xmlExtractor(object):
def __init__(self):
pass
def __call__(self, filepath):
retDict = {}
f = open(filepath, 'r')
Line = len(open(filepath, 'r').readlines())
retDict['Line'] = Line
tree = etree.parse(f)
root = tree.find("ResItem")
Id = root.get("ID")
retDict['Title'] = Id
resItemCnt = len(list(root.findall("ResItem"))) + 1
retDict['ResItemCount'] = resItemCnt
retDict['ChineseTip'] = 'None'
for child in root:
attrDict = child.attrib
keyword = "Name"
if(keyword in attrDict.keys() and attrDict['Name'] == "Caption"):
if len(child.attrib['Value']) > 1:
if child.attrib['Value'][0] == '~':
title = child.attrib['Value'][1:]
else:
title = child.attrib['Value'][0:]
#print(title)
chs = open(Locale_Path).read()
pattern = '<String id="' + title + '">[^>]+>'
m = re.search(pattern, chs)
if m != None:
realTitle = re.sub('<[^>]+>', '', m.group(0))
retDict['ChineseTip'] = realTitle
f.close()
return retDict
if __name__ == "__main__":
fo = xmlExtractor()
d = fo('./ActivateAccount.xml')
print(d)
Finally, there is the entry file, import the above two files, use xml.dom and os.listdir to recursively process the XML file and generate a result set.
I find the UnboundLocalError error of Python very interesting. I wonder if it is a symbol table overwrite problem.
#!/usr/bin/env python
from xmlExtractor import *
from replaceXmlEncoding import *
from xml.dom import minidom,Node
doc = minidom.Document()
extractor = xmlExtractor()
totalLines = 0
totalResItemCnt = 0
totalXmlFileCnt = 0
totalErrorCnt = 0
errorFileList = []
xmlRoot = doc.createElement("XmlResourceFile")
doc.appendChild(xmlRoot)
def myWalkDir(level, path):
global doc, extractor, totalLines, totalResItemCnt, totalXmlFileCnt
global totalErrorCnt, errorFileList
global xmlRoot
for i in os.listdir(path):
if i[-3:] == 'xml':
totalXmlFileCnt += 1
try:
# The first xml the encoding by gb2312 convert utf-8
replaceXmlEncoding(path + '\\' + i)
# To extract xml The information required in the document
info = extractor(path + '\\' + i)
# Create the node on the basis that there are no exceptions in the above two lines of code
#print(info)
#print(type(i))
xmlNode = doc.createElement("XmlFile")
xmlRoot.appendChild(xmlNode)
xmlName = doc.createElement("Filename")
xmlName.setAttribute('Value', i)
#xmlName.appendChild(doc.createTextNode(i))
xmlNode.appendChild(xmlName)
filePath = doc.createElement("Filepath")
filePath.setAttribute('Value', path[34:])
#filePath.appendChild(doc.createTextNode(path[1:]))
xmlNode.appendChild(filePath)
titleNode = doc.createElement("Title")
titleNode.setAttribute('Value', str(info['Title']))
#titleNode.appendChild(doc.createTextNode(str(info['Title'])))
xmlNode.appendChild(titleNode)
chsNode = doc.createElement("ChineseTip")
chsNode.setAttribute('Value', str(info['ChineseTip']))
#chsNode.appendChild(doc.createTextNode(str(info['Chinese'])))
xmlNode.appendChild(chsNode)
resItemNode = doc.createElement("ResItemCount")
resItemNode.setAttribute('Value', str(info['ResItemCount']))
#resItemNode.appendChild(doc.createTextNode(str(info['ResItemCount'])))
xmlNode.appendChild(resItemNode)
lineNode = doc.createElement("LineCount")
lineNode.setAttribute('Value', str(info['Line']))
#lineNode.appendChild(doc.createTextNode(str(info['Line'])))
xmlNode.appendChild(lineNode)
descNode = doc.createElement("Description")
descNode.setAttribute('Value', '')
#descNode.appendChild(doc.createTextNode(''))
xmlNode.appendChild(descNode)
except Exception as errorDetail:
totalErrorCnt += 1
errorFileList.append(path + '\\' + i)
print(path + '\\' + i, errorDetail)
if os.path.isdir(path + '\\' + i):
myWalkDir(level+1, path + '\\' + i)
if __name__ == "__main__":
path = os.getcwd() + '\\themes'
myWalkDir(0, path)
print(totalXmlFileCnt, totalErrorCnt)
#print(doc.toprettyxml(indent = " "))
resultXml = open("./xmlResourceList.xml", "w")
resultXml.write(doc.toprettyxml(indent = " "))
resultXml.close()
PS: here are some other online tools about xml operation for your reference:
Online XML/JSON interconversion tool:
http://tools.ofstack.com/code/xmljson
Online formatting XML/ online compression XML:
http://tools.ofstack.com/code/xmlformat
XML online compression/formatting tools:
http://tools.ofstack.com/code/xml_format_compress
XML code online formatting beautification tool:
http://tools.ofstack.com/code/xmlcodeformat
More about Python related topics: interested readers to view this site "Python xml data operation skill summary", "Python data structure and algorithm tutorial", "Python Socket programming skills summary", "Python function using techniques", "Python string skills summary", "Python introduction and advanced tutorial" and "Python file and directory skills summary"
I hope this article is helpful for you to design Python program.