1 """Miscellaneous utility functions useful for dealing with ESIS streams."""
10 import xml
.sax
.xmlreader
13 _data_match
= re
.compile(r
"[^\\][^\\]*").match
30 n
, s
= s
.split(";", 1)
31 r
= r
+ unichr(int(n
))
33 raise ValueError, "can't handle " + `s`
39 _charmap
[chr(c
)] = chr(c
)
40 _charmap
[unichr(c
+ 128)] = chr(c
+ 128)
41 _charmap
["\n"] = r
"\n"
42 _charmap
["\\"] = r
"\\"
48 return _null_join(map(_charmap
.get
, s
))
50 raise Exception("could not encode %r: %r" % (s
, map(_charmap
.get
, s
)))
53 class ESISReader(xml
.sax
.xmlreader
.XMLReader
):
54 """SAX Reader which reads from an ESIS stream.
56 No verification of the document structure is performed by the
57 reader; a general verifier could be used as the target
58 ContentHandler instance.
62 _lexical_handler
= None
72 def __init__(self
, contentHandler
=None, errorHandler
=None):
73 xml
.sax
.xmlreader
.XMLReader
.__init
__(self
)
75 self
._attributes
= Attributes(self
._attrs
)
76 self
._locator
= Locator()
79 self
.setContentHandler(contentHandler
)
81 self
.setErrorHandler(errorHandler
)
83 def get_empties(self
):
84 return self
._empties
.keys()
90 def parse(self
, source
):
92 self
._locator
._public
_id
= source
.getPublicId()
93 self
._locator
._system
_id
= source
.getSystemId()
94 fp
= source
.getByteStream()
95 handler
= self
.getContentHandler()
97 handler
.startDocument()
100 token
, data
= self
._get
_token
(fp
)
104 self
._locator
._lineno
= lineno
105 self
._handle
_token
(token
, data
)
106 handler
= self
.getContentHandler()
108 handler
.startDocument()
110 def feed(self
, data
):
111 if not self
._started
:
112 handler
= self
.getContentHandler()
114 handler
.startDocument()
116 data
= self
._buffer
+ data
118 lines
= data
.split("\n")
120 for line
in lines
[:-1]:
121 self
._lineno
= self
._lineno
+ 1
122 self
._locator
._lineno
= self
._lineno
124 e
= xml
.sax
.SAXParseException(
125 "ESIS input line contains no token type mark",
127 self
.getErrorHandler().error(e
)
129 self
._handle
_token
(line
[0], line
[1:])
130 self
._buffer
= lines
[-1]
135 handler
= self
.getContentHandler()
137 handler
.endDocument()
140 def _get_token(self
, fp
):
144 e
= SAXException("I/O error reading input stream", e
)
145 self
.getErrorHandler().fatalError(e
)
152 e
= xml
.sax
.SAXParseException(
153 "ESIS input line contains no token type mark",
155 self
.getErrorHandler().error(e
)
157 return line
[0], line
[1:]
159 def _handle_token(self
, token
, data
):
160 handler
= self
.getContentHandler()
163 handler
.characters(decode(data
))
166 handler
.endElement(decode(data
))
169 self
._empties
[data
] = 1
171 handler
.startElement(data
, self
._attributes
)
175 name
, value
= data
.split(' ', 1)
176 if value
!= "IMPLIED":
177 type, value
= value
.split(' ', 1)
178 self
._attrs
[name
] = (decode(value
), type)
180 # entity reference in SAX?
185 target
, data
= string
.split(data
, None, 1)
187 target
, data
= data
, ""
188 handler
.processingInstruction(target
, decode(data
))
190 handler
= self
.getDTDHandler()
192 handler
.notationDecl(data
, self
._public
_id
, self
._system
_id
)
193 self
._public
_id
= None
194 self
._system
_id
= None
196 self
._public
_id
= decode(data
)
198 self
._system
_id
= decode(data
)
204 e
= SAXParseException("unknown ESIS token in event stream",
206 self
.getErrorHandler().error(e
)
208 def setContentHandler(self
, handler
):
209 old
= self
.getContentHandler()
211 old
.setDocumentLocator(None)
213 handler
.setDocumentLocator(self
._locator
)
214 xml
.sax
.xmlreader
.XMLReader
.setContentHandler(self
, handler
)
216 def getProperty(self
, property):
217 if property == xml
.sax
.handler
.property_lexical_handler
:
218 return self
._lexical
_handler
220 elif property == xml
.sax
.handler
.property_declaration_handler
:
221 return self
._decl
_handler
224 raise xml
.sax
.SAXNotRecognizedException("unknown property %s"
227 def setProperty(self
, property, value
):
228 if property == xml
.sax
.handler
.property_lexical_handler
:
229 if self
._lexical
_handler
:
230 self
._lexical
_handler
.setDocumentLocator(None)
232 value
.setDocumentLocator(self
._locator
)
233 self
._lexical
_handler
= value
235 elif property == xml
.sax
.handler
.property_declaration_handler
:
236 if self
._decl
_handler
:
237 self
._decl
_handler
.setDocumentLocator(None)
239 value
.setDocumentLocator(self
._locator
)
240 self
._decl
_handler
= value
243 raise xml
.sax
.SAXNotRecognizedException()
245 def getFeature(self
, feature
):
246 if feature
== xml
.sax
.handler
.feature_namespaces
:
249 return xml
.sax
.xmlreader
.XMLReader
.getFeature(self
, feature
)
251 def setFeature(self
, feature
, enabled
):
252 if feature
== xml
.sax
.handler
.feature_namespaces
:
255 xml
.sax
.xmlreader
.XMLReader
.setFeature(self
, feature
, enabled
)
258 class Attributes(xml
.sax
.xmlreader
.AttributesImpl
):
259 # self._attrs has the form {name: (value, type)}
261 def getType(self
, name
):
262 return self
._attrs
[name
][1]
264 def getValue(self
, name
):
265 return self
._attrs
[name
][0]
267 def getValueByQName(self
, name
):
268 return self
._attrs
[name
][0]
270 def __getitem__(self
, name
):
271 return self
._attrs
[name
][0]
273 def get(self
, name
, default
=None):
274 if self
._attrs
.has_key(name
):
275 return self
._attrs
[name
][0]
280 for name
, (value
, type) in self
._attrs
.items():
281 L
.append((name
, value
))
286 for value
, type in self
._attrs
.values():
291 class Locator(xml
.sax
.xmlreader
.Locator
):
296 def getLineNumber(self
):
299 def getPublicId(self
):
300 return self
._public
_id
302 def getSystemId(self
):
303 return self
._system
_id
306 def parse(stream_or_string
, parser
=None):
307 if type(stream_or_string
) in [type(""), type(u
"")]:
308 stream
= open(stream_or_string
)
310 stream
= stream_or_string
312 parser
= ESISReader()
313 return xml
.dom
.pulldom
.DOMEventStream(stream
, parser
, (2 ** 14) - 20)