1 # Very simple test - Parse a file and print what happens
3 # XXX TypeErrors on calling handlers, or on bad return values from a
4 # handler, are obscure and unhelpful.
7 from xml
.parsers
import expat
9 from test
.test_support
import sortdict
, TestFailed
12 def StartElementHandler(self
, name
, attrs
):
13 print 'Start element:\n\t', repr(name
), sortdict(attrs
)
15 def EndElementHandler(self
, name
):
16 print 'End element:\n\t', repr(name
)
18 def CharacterDataHandler(self
, data
):
21 print 'Character data:'
22 print '\t', repr(data
)
24 def ProcessingInstructionHandler(self
, target
, data
):
25 print 'PI:\n\t', repr(target
), repr(data
)
27 def StartNamespaceDeclHandler(self
, prefix
, uri
):
28 print 'NS decl:\n\t', repr(prefix
), repr(uri
)
30 def EndNamespaceDeclHandler(self
, prefix
):
31 print 'End of NS decl:\n\t', repr(prefix
)
33 def StartCdataSectionHandler(self
):
34 print 'Start of CDATA section'
36 def EndCdataSectionHandler(self
):
37 print 'End of CDATA section'
39 def CommentHandler(self
, text
):
40 print 'Comment:\n\t', repr(text
)
42 def NotationDeclHandler(self
, *args
):
43 name
, base
, sysid
, pubid
= args
44 print 'Notation declared:', args
46 def UnparsedEntityDeclHandler(self
, *args
):
47 entityName
, base
, systemId
, publicId
, notationName
= args
48 print 'Unparsed entity decl:\n\t', args
50 def NotStandaloneHandler(self
, userData
):
51 print 'Not standalone'
54 def ExternalEntityRefHandler(self
, *args
):
55 context
, base
, sysId
, pubId
= args
56 print 'External entity ref:', args
[1:]
59 def DefaultHandler(self
, userData
):
62 def DefaultHandlerExpand(self
, userData
):
73 parser
= expat
.ParserCreate(namespace_separator
='!')
75 # Test getting/setting returns_unicode
76 parser
.returns_unicode
= 0; confirm(parser
.returns_unicode
== 0)
77 parser
.returns_unicode
= 1; confirm(parser
.returns_unicode
== 1)
78 parser
.returns_unicode
= 2; confirm(parser
.returns_unicode
== 1)
79 parser
.returns_unicode
= 0; confirm(parser
.returns_unicode
== 0)
81 # Test getting/setting ordered_attributes
82 parser
.ordered_attributes
= 0; confirm(parser
.ordered_attributes
== 0)
83 parser
.ordered_attributes
= 1; confirm(parser
.ordered_attributes
== 1)
84 parser
.ordered_attributes
= 2; confirm(parser
.ordered_attributes
== 1)
85 parser
.ordered_attributes
= 0; confirm(parser
.ordered_attributes
== 0)
87 # Test getting/setting specified_attributes
88 parser
.specified_attributes
= 0; confirm(parser
.specified_attributes
== 0)
89 parser
.specified_attributes
= 1; confirm(parser
.specified_attributes
== 1)
90 parser
.specified_attributes
= 2; confirm(parser
.specified_attributes
== 1)
91 parser
.specified_attributes
= 0; confirm(parser
.specified_attributes
== 0)
94 'StartElementHandler', 'EndElementHandler',
95 'CharacterDataHandler', 'ProcessingInstructionHandler',
96 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
97 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
98 'CommentHandler', 'StartCdataSectionHandler',
99 'EndCdataSectionHandler',
100 'DefaultHandler', 'DefaultHandlerExpand',
101 #'NotStandaloneHandler',
102 'ExternalEntityRefHandler'
104 for name
in HANDLER_NAMES
:
105 setattr(parser
, name
, getattr(out
, name
))
108 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
109 <?xml-stylesheet href="stylesheet.css"?>
110 <!-- comment data -->
111 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
113 <!NOTATION notation SYSTEM "notation.jpeg">
114 <!ENTITY acirc "â">
115 <!ENTITY external_entity SYSTEM "entity.file">
116 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
120 <root attr1="value1" attr2="value2ὀ">
121 <myns:subelement xmlns:myns="http://www.python.org/namespace">
122 Contents of subelements
124 <sub2><![CDATA[contents of CDATA section]]></sub2>
129 # Produce UTF-8 output
130 parser
.returns_unicode
= 0
132 parser
.Parse(data
, 1)
134 print '** Error', parser
.ErrorCode
, expat
.ErrorString(parser
.ErrorCode
)
135 print '** Line', parser
.ErrorLineNumber
136 print '** Column', parser
.ErrorColumnNumber
137 print '** Byte', parser
.ErrorByteIndex
139 # Try the parse again, this time producing Unicode output
140 parser
= expat
.ParserCreate(namespace_separator
='!')
141 parser
.returns_unicode
= 1
143 for name
in HANDLER_NAMES
:
144 setattr(parser
, name
, getattr(out
, name
))
146 parser
.Parse(data
, 1)
148 print '** Error', parser
.ErrorCode
, expat
.ErrorString(parser
.ErrorCode
)
149 print '** Line', parser
.ErrorLineNumber
150 print '** Column', parser
.ErrorColumnNumber
151 print '** Byte', parser
.ErrorByteIndex
154 parser
= expat
.ParserCreate(namespace_separator
='!')
155 parser
.returns_unicode
= 1
157 for name
in HANDLER_NAMES
:
158 setattr(parser
, name
, getattr(out
, name
))
160 file = StringIO
.StringIO(data
)
162 parser
.ParseFile(file)
164 print '** Error', parser
.ErrorCode
, expat
.ErrorString(parser
.ErrorCode
)
165 print '** Line', parser
.ErrorLineNumber
166 print '** Column', parser
.ErrorColumnNumber
167 print '** Byte', parser
.ErrorByteIndex
170 # Tests that make sure we get errors when the namespace_separator value
171 # is illegal, and that we don't for good values:
173 print "Testing constructor for proper handling of namespace_separator values:"
175 expat
.ParserCreate(namespace_separator
=None)
176 expat
.ParserCreate(namespace_separator
=' ')
177 print "Legal values tested o.k."
179 expat
.ParserCreate(namespace_separator
=42)
181 print "Caught expected TypeError:"
184 print "Failed to catch expected TypeError."
187 expat
.ParserCreate(namespace_separator
='too long')
188 except ValueError, e
:
189 print "Caught expected ValueError:"
192 print "Failed to catch expected ValueError."
194 # ParserCreate() needs to accept a namespace_separator of zero length
195 # to satisfy the requirements of RDF applications that are required
196 # to simply glue together the namespace URI and the localname. Though
197 # considered a wart of the RDF specifications, it needs to be supported.
199 # See XML-SIG mailing list thread starting with
200 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
202 expat
.ParserCreate(namespace_separator
='') # too short
204 # Test the interning machinery.
205 p
= expat
.ParserCreate()
207 def collector(name
, *args
):
209 p
.StartElementHandler
= collector
210 p
.EndElementHandler
= collector
211 p
.Parse("<e> <e/> <e></e> </e>", 1)
214 print "L should only contain 6 entries; found", len(L
)
217 print "expected L to contain many references to the same string",
222 # Tests of the buffer_text attribute.
226 def __init__(self
, parser
):
229 def check(self
, expected
, label
):
230 require(self
.stuff
== expected
,
231 "%s\nstuff = %s\nexpected = %s"
232 % (label
, `self
.stuff`
, `
map(unicode, expected
)`
))
234 def CharacterDataHandler(self
, text
):
235 self
.stuff
.append(text
)
237 def StartElementHandler(self
, name
, attrs
):
238 self
.stuff
.append("<%s>" % name
)
239 bt
= attrs
.get("buffer-text")
241 parser
.buffer_text
= 1
243 parser
.buffer_text
= 0
245 def EndElementHandler(self
, name
):
246 self
.stuff
.append("</%s>" % name
)
248 def CommentHandler(self
, data
):
249 self
.stuff
.append("<!--%s-->" % data
)
251 def require(cond
, label
):
252 # similar to confirm(), but no extraneous output
254 raise TestFailed(label
)
256 def setup(handlers
=[]):
257 parser
= expat
.ParserCreate()
258 require(not parser
.buffer_text
,
259 "buffer_text not disabled by default")
260 parser
.buffer_text
= 1
261 handler
= TextCollector(parser
)
262 parser
.CharacterDataHandler
= handler
.CharacterDataHandler
263 for name
in handlers
:
264 setattr(parser
, name
, getattr(handler
, name
))
265 return parser
, handler
267 parser
, handler
= setup()
268 require(parser
.buffer_text
,
269 "text buffering either not acknowledged or not enabled")
270 parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
271 handler
.check(["123"],
272 "buffered text not properly collapsed")
274 # XXX This test exposes more detail of Expat's text chunking than we
275 # XXX like, but it tests what we need to concisely.
276 parser
, handler
= setup(["StartElementHandler"])
277 parser
.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
278 handler
.check(["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
279 "buffering control not reacting as expected")
281 parser
, handler
= setup()
282 parser
.Parse("<a>1<b/><2><c/> \n 3</a>", 1)
283 handler
.check(["1<2> \n 3"],
284 "buffered text not properly collapsed")
286 parser
, handler
= setup(["StartElementHandler"])
287 parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
288 handler
.check(["<a>", "1", "<b>", "2", "<c>", "3"],
289 "buffered text not properly split")
291 parser
, handler
= setup(["StartElementHandler", "EndElementHandler"])
292 parser
.CharacterDataHandler
= None
293 parser
.Parse("<a>1<b/>2<c/>3</a>", 1)
294 handler
.check(["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"],
297 parser
, handler
= setup(["StartElementHandler", "EndElementHandler"])
298 parser
.Parse("<a>1<b></b>2<c/>3</a>", 1)
299 handler
.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"],
302 parser
, handler
= setup(["CommentHandler", "EndElementHandler",
303 "StartElementHandler"])
304 parser
.Parse("<a>1<b/>2<c></c>345</a> ", 1)
305 handler
.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
306 "buffered text not properly split")
308 parser
, handler
= setup(["CommentHandler", "EndElementHandler",
309 "StartElementHandler"])
310 parser
.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
311 handler
.check(["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
312 "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
313 "buffered text not properly split")