1 """Miscellaneous utility functions useful for dealing with ESIS streams."""
9 import xml
.sax
.xmlreader
12 _data_match
= re
.compile(r
"[^\\][^\\]*").match
29 n
, s
= s
.split(";", 1)
30 r
= r
+ unichr(int(n
))
32 raise ValueError, "can't handle " + `s`
38 _charmap
[chr(c
)] = chr(c
)
39 _charmap
[unichr(c
+ 128)] = chr(c
+ 128)
40 _charmap
["\n"] = r
"\n"
41 _charmap
["\\"] = r
"\\"
47 return _null_join(map(_charmap
.get
, s
))
49 raise Exception("could not encode %r: %r" % (s
, map(_charmap
.get
, s
)))
52 class ESISReader(xml
.sax
.xmlreader
.XMLReader
):
53 """SAX Reader which reads from an ESIS stream.
55 No verification of the document structure is performed by the
56 reader; a general verifier could be used as the target
57 ContentHandler instance.
61 _lexical_handler
= None
71 def __init__(self
, contentHandler
=None, errorHandler
=None):
72 xml
.sax
.xmlreader
.XMLReader
.__init
__(self
)
74 self
._attributes
= Attributes(self
._attrs
)
75 self
._locator
= Locator()
78 self
.setContentHandler(contentHandler
)
80 self
.setErrorHandler(errorHandler
)
82 def get_empties(self
):
83 return self
._empties
.keys()
89 def parse(self
, source
):
91 self
._locator
._public
_id
= source
.getPublicId()
92 self
._locator
._system
_id
= source
.getSystemId()
93 fp
= source
.getByteStream()
94 handler
= self
.getContentHandler()
96 handler
.startDocument()
99 token
, data
= self
._get
_token
(fp
)
103 self
._locator
._lineno
= lineno
104 self
._handle
_token
(token
, data
)
105 handler
= self
.getContentHandler()
107 handler
.startDocument()
109 def feed(self
, data
):
110 if not self
._started
:
111 handler
= self
.getContentHandler()
113 handler
.startDocument()
115 data
= self
._buffer
+ data
117 lines
= data
.split("\n")
119 for line
in lines
[:-1]:
120 self
._lineno
= self
._lineno
+ 1
121 self
._locator
._lineno
= self
._lineno
123 e
= xml
.sax
.SAXParseException(
124 "ESIS input line contains no token type mark",
126 self
.getErrorHandler().error(e
)
128 self
._handle
_token
(line
[0], line
[1:])
129 self
._buffer
= lines
[-1]
134 handler
= self
.getContentHandler()
136 handler
.endDocument()
139 def _get_token(self
, fp
):
143 e
= SAXException("I/O error reading input stream", e
)
144 self
.getErrorHandler().fatalError(e
)
151 e
= xml
.sax
.SAXParseException(
152 "ESIS input line contains no token type mark",
154 self
.getErrorHandler().error(e
)
156 return line
[0], line
[1:]
158 def _handle_token(self
, token
, data
):
159 handler
= self
.getContentHandler()
162 handler
.characters(decode(data
))
165 handler
.endElement(decode(data
))
168 self
._empties
[data
] = 1
171 handler
.startElement(data
, self
._attributes
)
174 name
, value
= data
.split(' ', 1)
175 if value
!= "IMPLIED":
176 type, value
= value
.split(' ', 1)
177 self
._attrs
[name
] = (decode(value
), type)
179 # entity reference in SAX?
184 target
, data
= data
.split(None, 1)
186 target
, data
= data
, ""
187 handler
.processingInstruction(target
, decode(data
))
189 handler
= self
.getDTDHandler()
191 handler
.notationDecl(data
, self
._public
_id
, self
._system
_id
)
192 self
._public
_id
= None
193 self
._system
_id
= None
195 self
._public
_id
= decode(data
)
197 self
._system
_id
= decode(data
)
203 e
= SAXParseException("unknown ESIS token in event stream",
205 self
.getErrorHandler().error(e
)
207 def setContentHandler(self
, handler
):
208 old
= self
.getContentHandler()
210 old
.setDocumentLocator(None)
212 handler
.setDocumentLocator(self
._locator
)
213 xml
.sax
.xmlreader
.XMLReader
.setContentHandler(self
, handler
)
215 def getProperty(self
, property):
216 if property == xml
.sax
.handler
.property_lexical_handler
:
217 return self
._lexical
_handler
219 elif property == xml
.sax
.handler
.property_declaration_handler
:
220 return self
._decl
_handler
223 raise xml
.sax
.SAXNotRecognizedException("unknown property %s"
226 def setProperty(self
, property, value
):
227 if property == xml
.sax
.handler
.property_lexical_handler
:
228 if self
._lexical
_handler
:
229 self
._lexical
_handler
.setDocumentLocator(None)
231 value
.setDocumentLocator(self
._locator
)
232 self
._lexical
_handler
= value
234 elif property == xml
.sax
.handler
.property_declaration_handler
:
235 if self
._decl
_handler
:
236 self
._decl
_handler
.setDocumentLocator(None)
238 value
.setDocumentLocator(self
._locator
)
239 self
._decl
_handler
= value
242 raise xml
.sax
.SAXNotRecognizedException()
244 def getFeature(self
, feature
):
245 if feature
== xml
.sax
.handler
.feature_namespaces
:
248 return xml
.sax
.xmlreader
.XMLReader
.getFeature(self
, feature
)
250 def setFeature(self
, feature
, enabled
):
251 if feature
== xml
.sax
.handler
.feature_namespaces
:
254 xml
.sax
.xmlreader
.XMLReader
.setFeature(self
, feature
, enabled
)
257 class Attributes(xml
.sax
.xmlreader
.AttributesImpl
):
258 # self._attrs has the form {name: (value, type)}
260 def getType(self
, name
):
261 return self
._attrs
[name
][1]
263 def getValue(self
, name
):
264 return self
._attrs
[name
][0]
266 def getValueByQName(self
, name
):
267 return self
._attrs
[name
][0]
269 def __getitem__(self
, name
):
270 return self
._attrs
[name
][0]
272 def get(self
, name
, default
=None):
273 if self
._attrs
.has_key(name
):
274 return self
._attrs
[name
][0]
279 for name
, (value
, type) in self
._attrs
.items():
280 L
.append((name
, value
))
285 for value
, type in self
._attrs
.values():
290 class Locator(xml
.sax
.xmlreader
.Locator
):
295 def getLineNumber(self
):
298 def getPublicId(self
):
299 return self
._public
_id
301 def getSystemId(self
):
302 return self
._system
_id
305 def parse(stream_or_string
, parser
=None):
306 if type(stream_or_string
) in [type(""), type(u
"")]:
307 stream
= open(stream_or_string
)
309 stream
= stream_or_string
311 parser
= ESISReader()
312 return xml
.dom
.pulldom
.DOMEventStream(stream
, parser
, (2 ** 14) - 20)