用python解析xml的几种方法

发布时间:2019-08-05 15:33:48编辑:auto阅读(1460)

    Xml:

    1. 标签成对出现

    2. 区分大小写

    3. 标签要正确嵌套

    4.开始部分:<?xml version="1.0" encoding="utf-8"?>

    5.只能有一个根节点

    6.节点可以有属性

    DTD:约束XML文件的节点

    Xml解析:

    1. dom(文档对象模型):把解析的xml整个加载到内存,组织成object树。

    w3cDom dom4j

    from xml.dom.minidom import parse
    doc=parse("students.xml")   得到整个DOM文档树
    root=doc.documentElement     得到根节点
    students=root.getElementsByTagName("student")
    class Student:
        def __init__(self,name,age,sex,score):
            self.name=name
            self.age=age
            self.sex=sex
            self.score=score
        def __str__(self):
            return "姓名:{0},年龄:{1},性别:{2},成绩:{3}".format(self.name,self.age,self.sex,self.score)
    people=[]
    for p in students:
        student = Student(p.getElementsByTagName("name")[0].childNodes[0].data,p.getElementsByTagName("age")[0].childNodes[0].data,p.getElementsByTagName("sex")[0].childNodes[0].data,p.getElementsByTagName("score")[0].childNodes[0].data)
        people.append(student)
    for p in people:
        print(p)

     

    2. Sax:事件驱动式解析,不会在内存中加载整个文档,只会根据自己编写的事件保存数据

    from xml.sax import ContentHandler
    from xml.sax import parse
    class Student:
        def __init__(self,name=None,age=None,sex=None,score=None):
            self.name=name
            self.age=age
            self.sex=sex
            self.score=score
        def __str__(self):
            return "姓名:{0},年龄:{1},性别:{2},成绩:{3}".format(self.name,self.age,self.sex,self.score)
    student=[]
    class mysax(ContentHandler):
        def __init__(self):
            self.tag=None
            
    self.student=None
        def
    startDocument(self):
            pass
        def
    startElement(self, name, attrs):
            self.tag = name
            if name=="student":
                self.student=Student()
        def characters(self, content):
            if self.tag=="name":
                self.student.name=content
            if self.tag=="age":
                self.student.age=content
            if self.tag=="sex":
                self.student.sex=content
            if self.tag=="score":
                self.student.score=content
        def endElement(self, name):
            if name=="student":
                student.append(self.student)
                self.student = None
            
    self.tag = None
        def
    endDocument(self):
            pass
    students=mysax()
    parse("students.xml",students)
    for p in student:
        print(p)

    3. 元素树解析(ElmentTree)

    from xml.etree.ElementTree import*
    class Student:
        def __init__(self,name=None,age=None,sex=None,score=None):
            self.name=name
            self.age=age
            self.sex=sex
            self.score=score
        def __str__(self):
            return "姓名:{0},年龄:{1},性别:{2},成绩:{3}".format(self.name,self.age,self.sex,self.score)
    people=[]
    root=parse("students.xml")
    student1=root.findall("student")
    for p in student1:
        student=Student()
        student.name=p.find("name").text
        student.age = p.find("age").text
        student.sex = p.find("sex").text
        student.score = p.find("score").text
        people.append(student)
    for p in people:
        print(p)

     

关键字