首先下载lxml, http://www.lfd.uci.edu/~gohlke/pythonlibs/ ,然后添加引用
from lxml import _elementpath as DONTUSE
from lxml import etree
具体示例:
1.添加命名空间
#set
namespace
nsmap = {"xsi": "http://www.w3.org/2001/XMLSchema-instance"
}
g_statisticsRoot = etree.Element("DcmStatistics", nsmap = nsmap)
2.添加xml
schema引用
#add
xsd reference
g_statisticsRoot.set("{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation", "DcmStatistics.xsd" )
3.添加注释
#add
comment, 利用addprevious添加到根节点的前面
comment = etree.Comment("create by
jiangong.li")
g_statisticsRoot.addprevious(comment)
4.尝试多种编码来解析xml
def decodingXml(xmlFile):
tree =
None
encoding
= "utf-8"
while(True):
try:
parser = etree.XMLParser(remove_blank_text= True, encoding=encoding,
remove_comments = False)
tree = etree.parse(xmlFile, parser)
except Exception as e:
if (encoding != "gb18030"):
encoding = "gb18030"
continue
else:
print( "\nPAR
XML ERROR, decoding error." )
break
break
return tree
5.遍历xml下的所有子节点,不止直属第一级子节点. iter()
for
element in
root.iter():
element.tail = None
6.遍历xml下的第一级子节点. iterchildren()
for e in srcParentNode.iterchildren():
if e is srcParentNode:
continue
name
= ""
#statistics node
if e.tag == "element":
name = "Element"
elif e.tag == "sequence":
name = "Sequence"
elif e.tag == "item":
name = "Item"
else:
print( "\nUnsupported
element type: %s\n" %(e.tag))
name = e.tag
#
Only parse element/sequence/item
continue
7.添加子节点到尾部.
append()
def getXmlElement(nodeName,
parentNode):
if parentNode == None:
raise Exception( "parent node is
None")
nodes =
parentNode.xpath( ‘./‘+nodeName)
if len(nodes) == 0:
node =
etree.Element(nodeName)
parentNode.append(node)
return node
else:
return nodes[0]
8.格式化成str输出
etree.tostring(g_statisticsRoot, encoding=
"UTF-8",
xml_declaration=True , pretty_print=True, with_comments=True )
9.保存成xml文件
statisticsResult = open(g_xmlName, "bw+")
statisticsResult.write(etree.tostring(g_statisticsRoot, encoding=
"UTF-8",
xml_declaration=True, pretty_print=True , with_comments=True))
statisticsResult.flush()
statisticsResult.close()