Files for 2.1b1 distribution.
[python/dscho.git] / Doc / tools / sgmlconv / esis2sgml.py
blobadb887312a0790e8af3e637f1f8d3d3095551159
1 #! /usr/bin/env python
3 """Convert ESIS events to SGML or XML markup.
5 This is limited, but seems sufficient for the ESIS generated by the
6 latex2esis.py script when run over the Python documentation.
7 """
9 # This should have an explicit option to indicate whether the *INPUT* was
10 # generated from an SGML or an XML application.
12 __version__ = '$Revision$'
14 import errno
15 import esistools
16 import os
17 import re
18 import string
20 from xml.utils import escape
23 AUTOCLOSE = ()
25 EMPTIES_FILENAME = "../sgml/empties.dat"
26 LIST_EMPTIES = 0
29 _elem_map = {}
30 _attr_map = {}
31 _token_map = {}
33 _normalize_case = str
35 def map_gi(sgmlgi, map):
36 uncased = _normalize_case(sgmlgi)
37 try:
38 return map[uncased]
39 except IndexError:
40 map[uncased] = sgmlgi
41 return sgmlgi
43 def null_map_gi(sgmlgi, map):
44 return sgmlgi
47 def format_attrs(attrs, xml=0):
48 attrs = attrs.items()
49 attrs.sort()
50 parts = []
51 append = parts.append
52 for name, value in attrs:
53 if xml:
54 append('%s="%s"' % (name, escape(value)))
55 else:
56 # this is a little bogus, but should do for now
57 if name == value and isnmtoken(value):
58 append(value)
59 elif istoken(value):
60 if value == "no" + name:
61 append(value)
62 else:
63 append("%s=%s" % (name, value))
64 else:
65 append('%s="%s"' % (name, escape(value)))
66 if parts:
67 parts.insert(0, '')
68 return string.join(parts)
71 _nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
72 def isnmtoken(s):
73 return _nmtoken_rx.match(s) is not None
75 _token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
76 def istoken(s):
77 return _token_rx.match(s) is not None
80 def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
81 if xml:
82 autoclose = ()
83 attrs = {}
84 lastopened = None
85 knownempties = []
86 knownempty = 0
87 lastempty = 0
88 inverbatim = 0
89 while 1:
90 line = ifp.readline()
91 if not line:
92 break
94 type = line[0]
95 data = line[1:]
96 if data and data[-1] == "\n":
97 data = data[:-1]
98 if type == "-":
99 data = esistools.decode(data)
100 data = escape(data)
101 if not inverbatim:
102 data = string.replace(data, "---", "—")
103 ofp.write(data)
104 if "\n" in data:
105 lastopened = None
106 knownempty = 0
107 lastempty = 0
108 elif type == "(":
109 if data == "COMMENT":
110 ofp.write("<!--")
111 continue
112 data = map_gi(data, _elem_map)
113 if knownempty and xml:
114 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
115 else:
116 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
117 if knownempty and data not in knownempties:
118 # accumulate knowledge!
119 knownempties.append(data)
120 attrs = {}
121 lastopened = data
122 lastempty = knownempty
123 knownempty = 0
124 inverbatim = data in verbatims
125 elif type == ")":
126 if data == "COMMENT":
127 ofp.write("-->")
128 continue
129 data = map_gi(data, _elem_map)
130 if xml:
131 if not lastempty:
132 ofp.write("</%s>" % data)
133 elif data not in knownempties:
134 if data in autoclose:
135 pass
136 elif lastopened == data:
137 ofp.write("</>")
138 else:
139 ofp.write("</%s>" % data)
140 lastopened = None
141 lastempty = 0
142 inverbatim = 0
143 elif type == "A":
144 name, type, value = string.split(data, " ", 2)
145 name = map_gi(name, _attr_map)
146 attrs[name] = esistools.decode(value)
147 elif type == "e":
148 knownempty = 1
149 elif type == "&":
150 ofp.write("&%s;" % data)
151 knownempty = 0
152 else:
153 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
155 if LIST_EMPTIES:
156 dump_empty_element_names(knownempties)
159 def dump_empty_element_names(knownempties):
160 d = {}
161 for gi in knownempties:
162 d[gi] = gi
163 knownempties.append("")
164 if os.path.isfile(EMPTIES_FILENAME):
165 fp = open(EMPTIES_FILENAME)
166 while 1:
167 line = fp.readline()
168 if not line:
169 break
170 gi = string.strip(line)
171 if gi:
172 d[gi] = gi
173 fp = open(EMPTIES_FILENAME, "w")
174 gilist = d.keys()
175 gilist.sort()
176 fp.write(string.join(gilist, "\n"))
177 fp.write("\n")
178 fp.close()
181 def update_gi_map(map, names, fromsgml=1):
182 for name in string.split(names, ","):
183 if fromsgml:
184 uncased = string.lower(name)
185 else:
186 uncased = name
187 map[uncased] = name
190 def main():
191 import getopt
192 import sys
194 autoclose = AUTOCLOSE
195 xml = 1
196 xmldecl = 0
197 elem_names = ''
198 attr_names = ''
199 value_names = ''
200 verbatims = ('verbatim', 'interactive-session')
201 opts, args = getopt.getopt(sys.argv[1:], "adesx",
202 ["autoclose=", "declare", "sgml", "xml",
203 "elements-map=", "attributes-map",
204 "values-map="])
205 for opt, arg in opts:
206 if opt in ("-d", "--declare"):
207 xmldecl = 1
208 elif opt == "-e":
209 global LIST_EMPTIES
210 LIST_EMPTIES = 1
211 elif opt in ("-s", "--sgml"):
212 xml = 0
213 elif opt in ("-x", "--xml"):
214 xml = 1
215 elif opt in ("-a", "--autoclose"):
216 autoclose = string.split(arg, ",")
217 elif opt == "--elements-map":
218 elem_names = ("%s,%s" % (elem_names, arg))[1:]
219 elif opt == "--attributes-map":
220 attr_names = ("%s,%s" % (attr_names, arg))[1:]
221 elif opt == "--values-map":
222 value_names = ("%s,%s" % (value_names, arg))[1:]
224 # open input streams:
226 if len(args) == 0:
227 ifp = sys.stdin
228 ofp = sys.stdout
229 elif len(args) == 1:
230 ifp = open(args[0])
231 ofp = sys.stdout
232 elif len(args) == 2:
233 ifp = open(args[0])
234 ofp = open(args[1], "w")
235 else:
236 usage()
237 sys.exit(2)
239 # setup the name maps:
241 if elem_names or attr_names or value_names:
242 # assume the origin was SGML; ignore case of the names from the ESIS
243 # stream but set up conversion tables to get the case right on output
244 global _normalize_case
245 _normalize_case = string.lower
246 update_gi_map(_elem_map, string.split(elem_names, ","))
247 update_gi_map(_attr_map, string.split(attr_names, ","))
248 update_gi_map(_values_map, string.split(value_names, ","))
249 else:
250 global map_gi
251 map_gi = null_map_gi
253 # run the conversion:
255 try:
256 if xml and xmldecl:
257 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
258 convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
259 except IOError, (err, msg):
260 if err != errno.EPIPE:
261 raise
264 if __name__ == "__main__":
265 main()