#books.xml Python & XML Python & HTML December 2001 Jones, Drake Programming Python, 4th Editio...
#books.xml
<catalog> <book isbn="0-596-00128-2"> <title>Python & XML</title> <title>Python & HTML</title> <date>December 2001</date> <author>Jones, Drake</author> </book> <book isbn="0-596-15810-6"> <title>Programming Python, 4th Edition</title> <date>October 2010</date> <author>Lutz</author> </book> <book isbn="0-596-15806-8"> <title>Learning Python, 4th Edition</title> <date>September 2009</date> <author>Lutz</author> </book> <book isbn="0-596-15808-4"> <title>Python Pocket Reference, 4th Edition</title> <date>October 2009</date> <author>Lutz</author> </book> <book isbn="0-596-00797-3"> <title>Python Cookbook, 2nd Edition</title> <date>March 2005</date> <author>Martelli, Ravenscroft, Ascher</author> </book> <book isbn="0-596-10046-9"> <title>Python in a Nutshell, 2nd Edition</title> <date>July 2006</date> <author>Martelli</author> </book> <!-- plus many more Python books that should appear here --> </catalog>
#conding:utf-8 # -*- coding:utf-8 -*- __author__ = 'hdfs' ''' 總的來說 sax解析xml 進行3個階段 sax是線性解析對於大的xml會很有效率 ''' import xml.sax,xml.sax.handler,pprint class BookHandler(xml.sax.handler.ContentHandler): def __init__(self): self.inTitle=False self.mapping={} def startElement(self, name, attrs): #book標簽開始 if name=="book": self.buffer="" self.isbn=attrs["isbn"] #title標簽開始 elif name=="title": self.inTitle=True def characters(self,data): #如果真的進入buffer 關聯多個子節點的數據 if self.inTitle: self.buffer+=data #結束一個元素的遍歷 def endElement(self,name): if name=="title": self.inTitle=False self.mapping[self.isbn]=self.buffer parser=xml.sax.make_parser() handler=BookHandler() parser.setContentHandler(handler) parser.parse('books.xml') pprint.pprint(handler.mapping)
result:
{u'0-596-00128-2': u'Python & XMLPython & HTML', u'0-596-00797-3': u'Python Cookbook, 2nd Edition', u'0-596-10046-9': u'Python in a Nutshell, 2nd Edition', u'0-596-15806-8': u'Learning Python, 4th Edition', u'0-596-15808-4': u'Python Pocket Reference, 4th Edition', u'0-596-15810-6': u'Programming Python, 4th Edition'}