python generates the xml file and beautifies the instance code

  • 2021-11-01 04:10:09
  • OfStack

Look at the code ~


# -*- coding:utf-8 -*- 
import os
import json
import numpy as np 
#from xml.etree import ElementTree as  etree
from xml.etree.ElementTree import Element
from xml.etree.ElementTree import SubElement
from xml.etree.ElementTree import ElementTree
 
imagePath = r'E:\Desktop\SteelCoilsDetection\test\images'
jsonPath = r'E:\Desktop\SteelCoilsDetection\test\json'
savePath = r'E:\Desktop\SteelCoilsDetection\test\xml'
 
jsonList = os.listdir(jsonPath)
for jsonName in jsonList:
    print(jsonName)
    readPath = os.path.join(jsonPath, jsonName)
 
    #  Open json Documents 
    with open(readPath, 'r') as file_loader:
        jsonDic = json.load(file_loader)
 
    # print(jsonDic.keys())
    # dict_keys(['version', 'flags', 'shapes', 'imagePath', 'imageData', 'imageHeight', 'imageWidth'])
    
    #  Generate xml Documents 
    annotation = Element('annotation')
 
    folder = SubElement(annotation, 'folder')
    folder.text = "images"
 
    filename = SubElement(annotation, 'filename')
    filename.text = jsonName.split('.')[0]
 
    path = SubElement(annotation, 'path')
    path.text = imagePath + jsonName.split('.')[0]
 
    source = SubElement(annotation, 'source')
    database = SubElement(source, 'database')
    database.text = "Unknown"
 
    size = SubElement(annotation, 'size')
    width = SubElement(size, 'width')
    width.text = str(jsonDic['imageWidth'])
    height = SubElement(size, 'height')
    height.text = str(jsonDic['imageHeight'])
    depth = SubElement(size, 'depth')
    depth.text = "3"
 
    segmented = SubElement(annotation, 'segmented')
    segmented.text = "0" 
    for shape in jsonDic['shapes']: 
        if shape["label"] == 'a':
            continue        
 
        object = SubElement(annotation, 'object') 
        name = SubElement(object, 'name')
        name.text = shape["label"]
 
        pose = SubElement(object, 'pose')
        pose.text = 'Unspecified'
 
        truncated = SubElement(object, 'truncated')
        truncated.text = str(0)
 
        difficult = SubElement(object, 'difficult')
        difficult.text = str(0)        
 
        points = shape['points']
        mritx = np.array(points)
        xxmin = min(mritx[:, 0])
        xxmax = max(mritx[:, 0])
        yymin = min(mritx[:, 1])
        yymax = max(mritx[:, 1])
 
        bndbox = SubElement(object, 'bndbox')
        xmin = SubElement(bndbox, 'xmin')
        xmin.text = str(int(xxmin))
        ymin = SubElement(bndbox, 'ymin')
        ymin.text = str(int(yymin))
        xmax = SubElement(bndbox, 'xmax')
        xmax.text = str(int(xxmax))
        ymax = SubElement(bndbox, 'ymax')
        ymax.text = str(int(yymax)) 
    tree = ElementTree(annotation)
    tree.write(os.path.join(savePath, jsonName.split('.')[0]+'.xml'), encoding = 'utf-8')

Beautify:


# -*- coding:utf-8 -*- 
import os
from xml.etree import ElementTree      #  Import ElementTree Module   
# elemnt For the coming in Elment Class, parameter indent Used for indentation, newline Used for line wrapping 
def prettyXml(element, indent, newline, level = 0):
 
    #  Judge element Whether there are child elements 
    if element:
 
        #  If element Adj. text No content 
        if element.text == None or element.text.isspace():
            element.text = newline + indent * (level + 1)
        else:
            element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
 
    #  If the comments are removed from the two lines here, Element Adj. text There will be another one 1 Row  
    #else:
        #element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
 
    temp = list(element) #  Will elemnt Turn into list
    for subelement in temp:
        #  If not list The last of 1 Elements, described below 1 A line is the beginning of an element of the same level, and the indentation should be 1 To 
        if temp.index(subelement) < (len(temp) - 1):
            subelement.tail = newline + indent * (level + 1)
        else:  #  If it is list The last of 1 Elements,   Under description 1 The line is the end of the parent element, and the indentation should be less 1 A 
            subelement.tail = newline + indent * level   
 
        #  Recursive operation on child elements  
        prettyXml(subelement, indent, newline, level = level + 1)  
 
dir = r'E:\Desktop\SteelCoilsDetection\test\xml'
for fileName in os.listdir(dir):
    print(fileName)
    tree = ElementTree.parse(os.path.join(dir, fileName))   # Analyse test.xml This file, the contents of which are as above 
    root = tree.getroot()                                   # Get the root element, Element Class 
    prettyXml(root, '\t', '\n')                             #  Implement beautification methods 
 
    #ElementTree.dump(root)                                 # Showing the beautified XML Content 
    tree.write(os.path.join(dir, fileName), encoding = 'utf-8')

Supplement: Python Standard Library xml Detailed Explanation

For simple XML parsing, the standard library xml can be used. Compared with the third library lxml, xml does not need additional installation, but xml is implemented with Python, and its performance is not as good as lxml

The parsing function of XML is mainly completed by xml. etree. ElementTree module, which contains two classes, ElementTree is used to represent the whole XML document, and Element is used to represent one node in the document

Sample data, named book. xml


<?xml version="1.0"?>
<bookstore>
	<book name=" Journey to the West ">
		<author> Wu Chengen </author>
		<dynasty> Ming Dynasty </dynasty>
		<similar name=" Romance of Enchanting the Gods " author=" Xu Zhonglin "/>
	</book>
	<book name=" Dream of Red Mansions ">
		<author> Cao Xueqin </author>
		<dynasty> Qing Dynasty </dynasty>
	</book>
	<book name="3 Romance of the Kingdom ">
		<author> Luo Guanzhong </author>
		<dynasty> Late Ming and early Qing </dynasty>
		<similar name="3 National Records " author=" Chen Shou "/>
	</book>
</bookstore>

Import the XML document to be parsed and get the root node of the document


import xml.etree.ElementTree as ET
tree = ET.parse("./book.xml")
root = tree.getroot()

You can also parse strings directly


with open("./book.xml") as fp:
    root = ET.fromstring(fp.read())

For every 1 node Element:

Direct child nodes can be accessed through the list interface

Attribute nodes can be accessed through the dictionary interface, or the real dictionary can be obtained through attrib attributes (for example, root. attrib)

Others include the tag attribute for the tag name, and text for the text content it contains


#  Traversing direct child nodes 
for book in root:
    print(book.tag, book.attrib, book.get("name"))
#  Access the first under the root node 2 Child node ,  Then go down to the number 1 Text of child nodes ,  That is  "<author> Cao Xueqin </author>"
author = root[1][0].text
print(type(author), author)

Printout

book {'name': 'Journey to the West'} Journey to the West

book {'name': 'Dream of Red Mansions'} Dream of Red Mansions

book {'name': 'Romance of the Three Kingdoms'} Romance of the Three Kingdoms

< class 'str' > Cao Xueqin

The obtained text result is different from lxml, where the result is directly of string type

Recursive function, which can traverse all descendant nodes


#  Recursively select all labels named  "similar"  Node of 
for book in root.iter("similar"):
    print(book.attrib)

Printout

{'name': 'Romance of the Gods', 'author': 'Xu Zhonglin'}

{'name': 'Records of the Three Kingdoms', 'author': 'Chen Shou'}

XPath Syntax

XPath is similar to a file path. The last part of the path indicates the content to be extracted. There are two kinds of separators. "/" indicates the relationship between direct child nodes, and "//" indicates all child nodes

语法 含义
tag 匹配特定标签
* 匹配所有元素
. 当前节点, 用于相对路径
父节点
[@attrib] 匹配包含 attrib 属性的节点
[@attrib=‘value'] 匹配 attrib 属性等于 value 的节点
[tag] 匹配包含直接子节点 tag 的节点
[tag=‘text'] 匹配包含直接子节点 tag 且子节点文本内容为 text 的节点
[n] 匹配第 n 个节点

[] must be preceded by a tag name, book [@ name] [similar] matches the book node with name attribute and similar immediate child node, and then places book [@ name] [similar] in the XPath path, for example "/bookstore/book [@ name] [similar]"

The XPath syntax can be used through the methods findall (path) and find (path) of the Element object, where the secondary path starts from the node represented by Element, or findall and find can be called through the ElementTree object, which is equivalent to the path starting from the root node

When a node is matched, findall returns a list of all matched nodes, find returns the first matched node, findall returns an empty list when no node is matched, and find returns None


# .  Denote  bookstore  Node 
author_1 = tree.find("./book[@name=' Dream of Red Mansions ']/author").text
author_2 = tree.findtext("./book[@name=' Dream of Red Mansions ']/author")
print(" A Dream of Red Mansions :", author_1, author_2)
author_3 = root.find("./book/similar[@name='3 National Records ']").get("author")
print("3 Author of National Records :", author_3)

Print results

Dream of Red Mansions Author: Cao Xueqin Cao Xueqin

3 National Records Author: Chen Shou

findtext is similar to find in that it gets the text content of the node directly


books_1 = root.findall("./book[similar]")
#  For immediate child nodes ,  Can be omitted  ./
books_2 = root.findall("book[similar]")
print(books_1 == books_2)
for book in books_1:
    print(book[0].text, book[1].text)

Print results

True

Wu Chengen Ming Dynasty

Luo Guanzhong in the Late Ming and Early Qing Dynasties


Related articles: