python generates the xml file and beautifies the instance code
- 2021-11-01 04:10:09
- OfStack
Look at the code ~
# -*- coding:utf-8 -*-
import os
import json
import numpy as np
#from xml.etree import ElementTree as etree
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import SubElement
from xml.etree.ElementTree import ElementTree
imagePath = r'E:\Desktop\SteelCoilsDetection\test\images'
jsonPath = r'E:\Desktop\SteelCoilsDetection\test\json'
savePath = r'E:\Desktop\SteelCoilsDetection\test\xml'
jsonList = os.listdir(jsonPath)
for jsonName in jsonList:
print(jsonName)
readPath = os.path.join(jsonPath, jsonName)
# Open json Documents
with open(readPath, 'r') as file_loader:
jsonDic = json.load(file_loader)
# print(jsonDic.keys())
# dict_keys(['version', 'flags', 'shapes', 'imagePath', 'imageData', 'imageHeight', 'imageWidth'])
# Generate xml Documents
annotation = Element('annotation')
folder = SubElement(annotation, 'folder')
folder.text = "images"
filename = SubElement(annotation, 'filename')
filename.text = jsonName.split('.')[0]
path = SubElement(annotation, 'path')
path.text = imagePath + jsonName.split('.')[0]
source = SubElement(annotation, 'source')
database = SubElement(source, 'database')
database.text = "Unknown"
size = SubElement(annotation, 'size')
width = SubElement(size, 'width')
width.text = str(jsonDic['imageWidth'])
height = SubElement(size, 'height')
height.text = str(jsonDic['imageHeight'])
depth = SubElement(size, 'depth')
depth.text = "3"
segmented = SubElement(annotation, 'segmented')
segmented.text = "0"
for shape in jsonDic['shapes']:
if shape["label"] == 'a':
continue
object = SubElement(annotation, 'object')
name = SubElement(object, 'name')
name.text = shape["label"]
pose = SubElement(object, 'pose')
pose.text = 'Unspecified'
truncated = SubElement(object, 'truncated')
truncated.text = str(0)
difficult = SubElement(object, 'difficult')
difficult.text = str(0)
points = shape['points']
mritx = np.array(points)
xxmin = min(mritx[:, 0])
xxmax = max(mritx[:, 0])
yymin = min(mritx[:, 1])
yymax = max(mritx[:, 1])
bndbox = SubElement(object, 'bndbox')
xmin = SubElement(bndbox, 'xmin')
xmin.text = str(int(xxmin))
ymin = SubElement(bndbox, 'ymin')
ymin.text = str(int(yymin))
xmax = SubElement(bndbox, 'xmax')
xmax.text = str(int(xxmax))
ymax = SubElement(bndbox, 'ymax')
ymax.text = str(int(yymax))
tree = ElementTree(annotation)
tree.write(os.path.join(savePath, jsonName.split('.')[0]+'.xml'), encoding = 'utf-8')
Beautify:
# -*- coding:utf-8 -*-
import os
from xml.etree import ElementTree # Import ElementTree Module
# elemnt For the coming in Elment Class, parameter indent Used for indentation, newline Used for line wrapping
def prettyXml(element, indent, newline, level = 0):
# Judge element Whether there are child elements
if element:
# If element Adj. text No content
if element.text == None or element.text.isspace():
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# If the comments are removed from the two lines here, Element Adj. text There will be another one 1 Row
#else:
#element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # Will elemnt Turn into list
for subelement in temp:
# If not list The last of 1 Elements, described below 1 A line is the beginning of an element of the same level, and the indentation should be 1 To
if temp.index(subelement) < (len(temp) - 1):
subelement.tail = newline + indent * (level + 1)
else: # If it is list The last of 1 Elements, Under description 1 The line is the end of the parent element, and the indentation should be less 1 A
subelement.tail = newline + indent * level
# Recursive operation on child elements
prettyXml(subelement, indent, newline, level = level + 1)
dir = r'E:\Desktop\SteelCoilsDetection\test\xml'
for fileName in os.listdir(dir):
print(fileName)
tree = ElementTree.parse(os.path.join(dir, fileName)) # Analyse test.xml This file, the contents of which are as above
root = tree.getroot() # Get the root element, Element Class
prettyXml(root, '\t', '\n') # Implement beautification methods
#ElementTree.dump(root) # Showing the beautified XML Content
tree.write(os.path.join(dir, fileName), encoding = 'utf-8')
Supplement: Python Standard Library xml Detailed Explanation
For simple XML parsing, the standard library xml can be used. Compared with the third library lxml, xml does not need additional installation, but xml is implemented with Python, and its performance is not as good as lxml
The parsing function of XML is mainly completed by xml. etree. ElementTree module, which contains two classes, ElementTree is used to represent the whole XML document, and Element is used to represent one node in the document
Sample data, named book. xml
<?xml version="1.0"?>
<bookstore>
<book name=" Journey to the West ">
<author> Wu Chengen </author>
<dynasty> Ming Dynasty </dynasty>
<similar name=" Romance of Enchanting the Gods " author=" Xu Zhonglin "/>
</book>
<book name=" Dream of Red Mansions ">
<author> Cao Xueqin </author>
<dynasty> Qing Dynasty </dynasty>
</book>
<book name="3 Romance of the Kingdom ">
<author> Luo Guanzhong </author>
<dynasty> Late Ming and early Qing </dynasty>
<similar name="3 National Records " author=" Chen Shou "/>
</book>
</bookstore>
Import the XML document to be parsed and get the root node of the document
import xml.etree.ElementTree as ET
tree = ET.parse("./book.xml")
root = tree.getroot()
You can also parse strings directly
with open("./book.xml") as fp:
root = ET.fromstring(fp.read())
For every 1 node Element:
Direct child nodes can be accessed through the list interface
Attribute nodes can be accessed through the dictionary interface, or the real dictionary can be obtained through attrib attributes (for example, root. attrib)
Others include the tag attribute for the tag name, and text for the text content it contains
# Traversing direct child nodes
for book in root:
print(book.tag, book.attrib, book.get("name"))
# Access the first under the root node 2 Child node , Then go down to the number 1 Text of child nodes , That is "<author> Cao Xueqin </author>"
author = root[1][0].text
print(type(author), author)
Printout
book {'name': 'Journey to the West'} Journey to the West
book {'name': 'Dream of Red Mansions'} Dream of Red Mansions
book {'name': 'Romance of the Three Kingdoms'} Romance of the Three Kingdoms
< class 'str' > Cao Xueqin
The obtained text result is different from lxml, where the result is directly of string type
Recursive function, which can traverse all descendant nodes
# Recursively select all labels named "similar" Node of
for book in root.iter("similar"):
print(book.attrib)
Printout
{'name': 'Romance of the Gods', 'author': 'Xu Zhonglin'}
{'name': 'Records of the Three Kingdoms', 'author': 'Chen Shou'}
XPath Syntax
XPath is similar to a file path. The last part of the path indicates the content to be extracted. There are two kinds of separators. "/" indicates the relationship between direct child nodes, and "//" indicates all child nodes
语法 | 含义 |
---|---|
tag | 匹配特定标签 |
* | 匹配所有元素 |
. | 当前节点, 用于相对路径 |
… | 父节点 |
[@attrib] | 匹配包含 attrib 属性的节点 |
[@attrib=‘value'] | 匹配 attrib 属性等于 value 的节点 |
[tag] | 匹配包含直接子节点 tag 的节点 |
[tag=‘text'] | 匹配包含直接子节点 tag 且子节点文本内容为 text 的节点 |
[n] | 匹配第 n 个节点 |
[] must be preceded by a tag name, book [@ name] [similar] matches the book node with name attribute and similar immediate child node, and then places book [@ name] [similar] in the XPath path, for example "/bookstore/book [@ name] [similar]"
The XPath syntax can be used through the methods findall (path) and find (path) of the Element object, where the secondary path starts from the node represented by Element, or findall and find can be called through the ElementTree object, which is equivalent to the path starting from the root node
When a node is matched, findall returns a list of all matched nodes, find returns the first matched node, findall returns an empty list when no node is matched, and find returns None
# . Denote bookstore Node
author_1 = tree.find("./book[@name=' Dream of Red Mansions ']/author").text
author_2 = tree.findtext("./book[@name=' Dream of Red Mansions ']/author")
print(" A Dream of Red Mansions :", author_1, author_2)
author_3 = root.find("./book/similar[@name='3 National Records ']").get("author")
print("3 Author of National Records :", author_3)
Print results
Dream of Red Mansions Author: Cao Xueqin Cao Xueqin
3 National Records Author: Chen Shou
findtext is similar to find in that it gets the text content of the node directly
books_1 = root.findall("./book[similar]")
# For immediate child nodes , Can be omitted ./
books_2 = root.findall("book[similar]")
print(books_1 == books_2)
for book in books_1:
print(book[0].text, book[1].text)
Print results
True
Wu Chengen Ming Dynasty
Luo Guanzhong in the Late Ming and Early Qing Dynasties