Apparently the code to forestall Tk eating events was too aggressive (Tk user input...
[python/dscho.git] / Doc / tools / sgmlconv / esistools.py
blob7feeada8dc978642d32856b1b46cab9345bdfa5f
1 """Miscellaneous utility functions useful for dealing with ESIS streams."""
3 import re
4 import string
6 import xml.dom.pulldom
8 import xml.sax
9 import xml.sax.handler
10 import xml.sax.xmlreader
13 _data_match = re.compile(r"[^\\][^\\]*").match
15 def decode(s):
16 r = ''
17 while s:
18 m = _data_match(s)
19 if m:
20 r = r + m.group()
21 s = s[m.end():]
22 elif s[1] == "\\":
23 r = r + "\\"
24 s = s[2:]
25 elif s[1] == "n":
26 r = r + "\n"
27 s = s[2:]
28 elif s[1] == "%":
29 s = s[2:]
30 n, s = s.split(";", 1)
31 r = r + unichr(int(n))
32 else:
33 raise ValueError, "can't handle " + `s`
34 return r
37 _charmap = {}
38 for c in range(128):
39 _charmap[chr(c)] = chr(c)
40 _charmap[unichr(c + 128)] = chr(c + 128)
41 _charmap["\n"] = r"\n"
42 _charmap["\\"] = r"\\"
43 del c
45 _null_join = ''.join
46 def encode(s):
47 try:
48 return _null_join(map(_charmap.get, s))
49 except TypeError:
50 raise Exception("could not encode %r: %r" % (s, map(_charmap.get, s)))
53 class ESISReader(xml.sax.xmlreader.XMLReader):
54 """SAX Reader which reads from an ESIS stream.
56 No verification of the document structure is performed by the
57 reader; a general verifier could be used as the target
58 ContentHandler instance.
60 """
61 _decl_handler = None
62 _lexical_handler = None
64 _public_id = None
65 _system_id = None
67 _buffer = ""
68 _is_empty = 0
69 _lineno = 0
70 _started = 0
72 def __init__(self, contentHandler=None, errorHandler=None):
73 xml.sax.xmlreader.XMLReader.__init__(self)
74 self._attrs = {}
75 self._attributes = Attributes(self._attrs)
76 self._locator = Locator()
77 self._empties = {}
78 if contentHandler:
79 self.setContentHandler(contentHandler)
80 if errorHandler:
81 self.setErrorHandler(errorHandler)
83 def get_empties(self):
84 return self._empties.keys()
87 # XMLReader interface
90 def parse(self, source):
91 raise RuntimeError
92 self._locator._public_id = source.getPublicId()
93 self._locator._system_id = source.getSystemId()
94 fp = source.getByteStream()
95 handler = self.getContentHandler()
96 if handler:
97 handler.startDocument()
98 lineno = 0
99 while 1:
100 token, data = self._get_token(fp)
101 if token is None:
102 break
103 lineno = lineno + 1
104 self._locator._lineno = lineno
105 self._handle_token(token, data)
106 handler = self.getContentHandler()
107 if handler:
108 handler.startDocument()
110 def feed(self, data):
111 if not self._started:
112 handler = self.getContentHandler()
113 if handler:
114 handler.startDocument()
115 self._started = 1
116 data = self._buffer + data
117 self._buffer = None
118 lines = data.split("\n")
119 if lines:
120 for line in lines[:-1]:
121 self._lineno = self._lineno + 1
122 self._locator._lineno = self._lineno
123 if not line:
124 e = xml.sax.SAXParseException(
125 "ESIS input line contains no token type mark",
126 None, self._locator)
127 self.getErrorHandler().error(e)
128 else:
129 self._handle_token(line[0], line[1:])
130 self._buffer = lines[-1]
131 else:
132 self._buffer = ""
134 def close(self):
135 handler = self.getContentHandler()
136 if handler:
137 handler.endDocument()
138 self._buffer = ""
140 def _get_token(self, fp):
141 try:
142 line = fp.readline()
143 except IOError, e:
144 e = SAXException("I/O error reading input stream", e)
145 self.getErrorHandler().fatalError(e)
146 return
147 if not line:
148 return None, None
149 if line[-1] == "\n":
150 line = line[:-1]
151 if not line:
152 e = xml.sax.SAXParseException(
153 "ESIS input line contains no token type mark",
154 None, self._locator)
155 self.getErrorHandler().error(e)
156 return
157 return line[0], line[1:]
159 def _handle_token(self, token, data):
160 handler = self.getContentHandler()
161 if token == '-':
162 if data and handler:
163 handler.characters(decode(data))
164 elif token == ')':
165 if handler:
166 handler.endElement(decode(data))
167 elif token == '(':
168 if self._is_empty:
169 self._empties[data] = 1
170 if handler:
171 handler.startElement(data, self._attributes)
172 self._attrs.clear()
173 self._is_empty = 0
174 elif token == 'A':
175 name, value = data.split(' ', 1)
176 if value != "IMPLIED":
177 type, value = value.split(' ', 1)
178 self._attrs[name] = (decode(value), type)
179 elif token == '&':
180 # entity reference in SAX?
181 pass
182 elif token == '?':
183 if handler:
184 if ' ' in data:
185 target, data = string.split(data, None, 1)
186 else:
187 target, data = data, ""
188 handler.processingInstruction(target, decode(data))
189 elif token == 'N':
190 handler = self.getDTDHandler()
191 if handler:
192 handler.notationDecl(data, self._public_id, self._system_id)
193 self._public_id = None
194 self._system_id = None
195 elif token == 'p':
196 self._public_id = decode(data)
197 elif token == 's':
198 self._system_id = decode(data)
199 elif token == 'e':
200 self._is_empty = 1
201 elif token == 'C':
202 pass
203 else:
204 e = SAXParseException("unknown ESIS token in event stream",
205 None, self._locator)
206 self.getErrorHandler().error(e)
208 def setContentHandler(self, handler):
209 old = self.getContentHandler()
210 if old:
211 old.setDocumentLocator(None)
212 if handler:
213 handler.setDocumentLocator(self._locator)
214 xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
216 def getProperty(self, property):
217 if property == xml.sax.handler.property_lexical_handler:
218 return self._lexical_handler
220 elif property == xml.sax.handler.property_declaration_handler:
221 return self._decl_handler
223 else:
224 raise xml.sax.SAXNotRecognizedException("unknown property %s"
225 % `property`)
227 def setProperty(self, property, value):
228 if property == xml.sax.handler.property_lexical_handler:
229 if self._lexical_handler:
230 self._lexical_handler.setDocumentLocator(None)
231 if value:
232 value.setDocumentLocator(self._locator)
233 self._lexical_handler = value
235 elif property == xml.sax.handler.property_declaration_handler:
236 if self._decl_handler:
237 self._decl_handler.setDocumentLocator(None)
238 if value:
239 value.setDocumentLocator(self._locator)
240 self._decl_handler = value
242 else:
243 raise xml.sax.SAXNotRecognizedException()
245 def getFeature(self, feature):
246 if feature == xml.sax.handler.feature_namespaces:
247 return 1
248 else:
249 return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
251 def setFeature(self, feature, enabled):
252 if feature == xml.sax.handler.feature_namespaces:
253 pass
254 else:
255 xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
258 class Attributes(xml.sax.xmlreader.AttributesImpl):
259 # self._attrs has the form {name: (value, type)}
261 def getType(self, name):
262 return self._attrs[name][1]
264 def getValue(self, name):
265 return self._attrs[name][0]
267 def getValueByQName(self, name):
268 return self._attrs[name][0]
270 def __getitem__(self, name):
271 return self._attrs[name][0]
273 def get(self, name, default=None):
274 if self._attrs.has_key(name):
275 return self._attrs[name][0]
276 return default
278 def items(self):
279 L = []
280 for name, (value, type) in self._attrs.items():
281 L.append((name, value))
282 return L
284 def values(self):
285 L = []
286 for value, type in self._attrs.values():
287 L.append(value)
288 return L
291 class Locator(xml.sax.xmlreader.Locator):
292 _lineno = -1
293 _public_id = None
294 _system_id = None
296 def getLineNumber(self):
297 return self._lineno
299 def getPublicId(self):
300 return self._public_id
302 def getSystemId(self):
303 return self._system_id
306 def parse(stream_or_string, parser=None):
307 if type(stream_or_string) in [type(""), type(u"")]:
308 stream = open(stream_or_string)
309 else:
310 stream = stream_or_string
311 if not parser:
312 parser = ESISReader()
313 return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)