3 # $Id: SimpleXMLTreeBuilder.py 1862 2004-06-18 07:31:02Z Fredrik $
5 # A simple XML tree builder, based on Python's xmllib
7 # Note that due to bugs in xmllib, this builder does not fully support
8 # namespaces (unqualified attributes are put in the default namespace,
9 # instead of being left as is). Run this module as a script to find
10 # out if this affects your Python version.
13 # 2001-10-20 fl created
14 # 2002-05-01 fl added namespace support for xmllib
15 # 2002-08-17 fl added xmllib sanity test
17 # Copyright (c) 1999-2004 by Fredrik Lundh. All rights reserved.
19 # fredrik@pythonware.com
20 # http://www.pythonware.com
22 # --------------------------------------------------------------------
23 # The ElementTree toolkit is
25 # Copyright (c) 1999-2004 by Fredrik Lundh
27 # By obtaining, using, and/or copying this software and/or its
28 # associated documentation, you agree that you have read, understood,
29 # and will comply with the following terms and conditions:
31 # Permission to use, copy, modify, and distribute this software and
32 # its associated documentation for any purpose and without fee is
33 # hereby granted, provided that the above copyright notice appears in
34 # all copies, and that both that copyright notice and this permission
35 # notice appear in supporting documentation, and that the name of
36 # Secret Labs AB or the author not be used in advertising or publicity
37 # pertaining to distribution of the software without specific, written
40 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
41 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
42 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
43 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
44 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
45 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
46 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
48 # --------------------------------------------------------------------
51 # Tools to build element trees from XML files, using <b>xmllib</b>.
52 # This module can be used instead of the standard tree builder, for
53 # Python versions where "expat" is not available (such as 1.5.2).
55 # Note that due to bugs in <b>xmllib</b>, the namespace support is
56 # not reliable (you can run the module as a script to find out exactly
57 # how unreliable it is on your Python version).
65 # ElementTree builder for XML source data.
67 # @see elementtree.ElementTree
69 class TreeBuilder(xmllib
.XMLParser
):
71 def __init__(self
, html
=0):
72 self
.__builder
= ElementTree
.TreeBuilder()
75 self
.entitydefs
.update(htmlentitydefs
.entitydefs
)
76 xmllib
.XMLParser
.__init
__(self
)
79 # Feeds data to the parser.
81 # @param data Encoded data.
84 xmllib
.XMLParser
.feed(self
, data
)
87 # Finishes feeding data to the parser.
89 # @return An element structure.
93 xmllib
.XMLParser
.close(self
)
94 return self
.__builder
.close()
96 def handle_data(self
, data
):
97 self
.__builder
.data(data
)
99 handle_cdata
= handle_data
101 def unknown_starttag(self
, tag
, attrs
):
103 for key
, value
in attrs
.items():
104 attrib
[fixname(key
)] = value
105 self
.__builder
.start(fixname(tag
), attrib
)
107 def unknown_endtag(self
, tag
):
108 self
.__builder
.end(fixname(tag
))
111 def fixname(name
, split
=string
.split
):
112 # xmllib in 2.0 and later provides limited (and slightly broken)
113 # support for XML namespaces.
116 return "{%s}%s" % tuple(split(name
, " ", 1))
119 if __name__
== "__main__":
121 # sanity check: look for known namespace bugs in xmllib
124 <root xmlns='default'>
125 <tag attribute='value' />
131 # check for bugs in the xmllib implementation
132 tag
= tree
.find("{default}tag")
134 status
.append("namespaces not supported")
135 if tag
is not None and tag
.get("{default}attribute"):
136 status
.append("default namespace applied to unqualified attribute")
139 print "xmllib doesn't work properly in this Python version:"
143 print "congratulations; no problems found in xmllib"