Apparently the code to forestall Tk eating events was too aggressive (Tk user input...
[python/dscho.git] / Doc / tools / sgmlconv / esis2sgml.py
blob7bda92962252a6be49af0784f84dcffafb14b299
1 #! /usr/bin/env python
3 """Convert ESIS events to SGML or XML markup.
5 This is limited, but seems sufficient for the ESIS generated by the
6 latex2esis.py script when run over the Python documentation.
7 """
9 # This should have an explicit option to indicate whether the *INPUT* was
10 # generated from an SGML or an XML application.
12 import errno
13 import esistools
14 import os
15 import re
16 import string
18 from xml.sax.saxutils import escape
21 AUTOCLOSE = ()
23 EMPTIES_FILENAME = "../sgml/empties.dat"
24 LIST_EMPTIES = 0
27 _elem_map = {}
28 _attr_map = {}
29 _token_map = {}
31 _normalize_case = str
33 def map_gi(sgmlgi, map):
34 uncased = _normalize_case(sgmlgi)
35 try:
36 return map[uncased]
37 except IndexError:
38 map[uncased] = sgmlgi
39 return sgmlgi
41 def null_map_gi(sgmlgi, map):
42 return sgmlgi
45 def format_attrs(attrs, xml=0):
46 attrs = attrs.items()
47 attrs.sort()
48 parts = []
49 append = parts.append
50 for name, value in attrs:
51 if xml:
52 append('%s="%s"' % (name, escape(value)))
53 else:
54 # this is a little bogus, but should do for now
55 if name == value and isnmtoken(value):
56 append(value)
57 elif istoken(value):
58 if value == "no" + name:
59 append(value)
60 else:
61 append("%s=%s" % (name, value))
62 else:
63 append('%s="%s"' % (name, escape(value)))
64 if parts:
65 parts.insert(0, '')
66 return string.join(parts)
69 _nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
70 def isnmtoken(s):
71 return _nmtoken_rx.match(s) is not None
73 _token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
74 def istoken(s):
75 return _token_rx.match(s) is not None
78 def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
79 if xml:
80 autoclose = ()
81 attrs = {}
82 lastopened = None
83 knownempties = []
84 knownempty = 0
85 lastempty = 0
86 inverbatim = 0
87 while 1:
88 line = ifp.readline()
89 if not line:
90 break
92 type = line[0]
93 data = line[1:]
94 if data and data[-1] == "\n":
95 data = data[:-1]
96 if type == "-":
97 data = esistools.decode(data)
98 data = escape(data)
99 if not inverbatim:
100 data = string.replace(data, "---", "—")
101 ofp.write(data)
102 if "\n" in data:
103 lastopened = None
104 knownempty = 0
105 lastempty = 0
106 elif type == "(":
107 if data == "COMMENT":
108 ofp.write("<!--")
109 continue
110 data = map_gi(data, _elem_map)
111 if knownempty and xml:
112 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
113 else:
114 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
115 if knownempty and data not in knownempties:
116 # accumulate knowledge!
117 knownempties.append(data)
118 attrs = {}
119 lastopened = data
120 lastempty = knownempty
121 knownempty = 0
122 inverbatim = data in verbatims
123 elif type == ")":
124 if data == "COMMENT":
125 ofp.write("-->")
126 continue
127 data = map_gi(data, _elem_map)
128 if xml:
129 if not lastempty:
130 ofp.write("</%s>" % data)
131 elif data not in knownempties:
132 if data in autoclose:
133 pass
134 elif lastopened == data:
135 ofp.write("</>")
136 else:
137 ofp.write("</%s>" % data)
138 lastopened = None
139 lastempty = 0
140 inverbatim = 0
141 elif type == "A":
142 name, type, value = string.split(data, " ", 2)
143 name = map_gi(name, _attr_map)
144 attrs[name] = esistools.decode(value)
145 elif type == "e":
146 knownempty = 1
147 elif type == "&":
148 ofp.write("&%s;" % data)
149 knownempty = 0
150 else:
151 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
153 if LIST_EMPTIES:
154 dump_empty_element_names(knownempties)
157 def dump_empty_element_names(knownempties):
158 d = {}
159 for gi in knownempties:
160 d[gi] = gi
161 knownempties.append("")
162 if os.path.isfile(EMPTIES_FILENAME):
163 fp = open(EMPTIES_FILENAME)
164 while 1:
165 line = fp.readline()
166 if not line:
167 break
168 gi = string.strip(line)
169 if gi:
170 d[gi] = gi
171 fp = open(EMPTIES_FILENAME, "w")
172 gilist = d.keys()
173 gilist.sort()
174 fp.write(string.join(gilist, "\n"))
175 fp.write("\n")
176 fp.close()
179 def update_gi_map(map, names, fromsgml=1):
180 for name in string.split(names, ","):
181 if fromsgml:
182 uncased = string.lower(name)
183 else:
184 uncased = name
185 map[uncased] = name
188 def main():
189 import getopt
190 import sys
192 autoclose = AUTOCLOSE
193 xml = 1
194 xmldecl = 0
195 elem_names = ''
196 attr_names = ''
197 value_names = ''
198 verbatims = ('verbatim', 'interactive-session')
199 opts, args = getopt.getopt(sys.argv[1:], "adesx",
200 ["autoclose=", "declare", "sgml", "xml",
201 "elements-map=", "attributes-map",
202 "values-map="])
203 for opt, arg in opts:
204 if opt in ("-d", "--declare"):
205 xmldecl = 1
206 elif opt == "-e":
207 global LIST_EMPTIES
208 LIST_EMPTIES = 1
209 elif opt in ("-s", "--sgml"):
210 xml = 0
211 elif opt in ("-x", "--xml"):
212 xml = 1
213 elif opt in ("-a", "--autoclose"):
214 autoclose = string.split(arg, ",")
215 elif opt == "--elements-map":
216 elem_names = ("%s,%s" % (elem_names, arg))[1:]
217 elif opt == "--attributes-map":
218 attr_names = ("%s,%s" % (attr_names, arg))[1:]
219 elif opt == "--values-map":
220 value_names = ("%s,%s" % (value_names, arg))[1:]
222 # open input streams:
224 if len(args) == 0:
225 ifp = sys.stdin
226 ofp = sys.stdout
227 elif len(args) == 1:
228 ifp = open(args[0])
229 ofp = sys.stdout
230 elif len(args) == 2:
231 ifp = open(args[0])
232 ofp = open(args[1], "w")
233 else:
234 usage()
235 sys.exit(2)
237 # setup the name maps:
239 if elem_names or attr_names or value_names:
240 # assume the origin was SGML; ignore case of the names from the ESIS
241 # stream but set up conversion tables to get the case right on output
242 global _normalize_case
243 _normalize_case = string.lower
244 update_gi_map(_elem_map, string.split(elem_names, ","))
245 update_gi_map(_attr_map, string.split(attr_names, ","))
246 update_gi_map(_values_map, string.split(value_names, ","))
247 else:
248 global map_gi
249 map_gi = null_map_gi
251 # run the conversion:
253 try:
254 if xml and xmldecl:
255 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
256 convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
257 except IOError, (err, msg):
258 if err != errno.EPIPE:
259 raise
262 if __name__ == "__main__":
263 main()