3 """Convert ESIS events to SGML or XML markup.
5 This is limited, but seems sufficient for the ESIS generated by the
6 latex2esis.py script when run over the Python documentation.
9 # This should have an explicit option to indicate whether the *INPUT* was
10 # generated from an SGML or an XML application.
18 from xml
.sax
.saxutils
import escape
23 EMPTIES_FILENAME
= "../sgml/empties.dat"
33 def map_gi(sgmlgi
, map):
34 uncased
= _normalize_case(sgmlgi
)
41 def null_map_gi(sgmlgi
, map):
45 def format_attrs(attrs
, xml
=0):
50 for name
, value
in attrs
:
52 append('%s="%s"' % (name
, escape(value
)))
54 # this is a little bogus, but should do for now
55 if name
== value
and isnmtoken(value
):
58 if value
== "no" + name
:
61 append("%s=%s" % (name
, value
))
63 append('%s="%s"' % (name
, escape(value
)))
66 return string
.join(parts
)
69 _nmtoken_rx
= re
.compile("[a-z][-._a-z0-9]*$", re
.IGNORECASE
)
71 return _nmtoken_rx
.match(s
) is not None
73 _token_rx
= re
.compile("[a-z0-9][-._a-z0-9]*$", re
.IGNORECASE
)
75 return _token_rx
.match(s
) is not None
78 def convert(ifp
, ofp
, xml
=0, autoclose
=(), verbatims
=()):
94 if data
and data
[-1] == "\n":
97 data
= esistools
.decode(data
)
100 data
= string
.replace(data
, "---", "—")
107 if data
== "COMMENT":
110 data
= map_gi(data
, _elem_map
)
111 if knownempty
and xml
:
112 ofp
.write("<%s%s/>" % (data
, format_attrs(attrs
, xml
)))
114 ofp
.write("<%s%s>" % (data
, format_attrs(attrs
, xml
)))
115 if knownempty
and data
not in knownempties
:
116 # accumulate knowledge!
117 knownempties
.append(data
)
120 lastempty
= knownempty
122 inverbatim
= data
in verbatims
124 if data
== "COMMENT":
127 data
= map_gi(data
, _elem_map
)
130 ofp
.write("</%s>" % data
)
131 elif data
not in knownempties
:
132 if data
in autoclose
:
134 elif lastopened
== data
:
137 ofp
.write("</%s>" % data
)
142 name
, type, value
= string
.split(data
, " ", 2)
143 name
= map_gi(name
, _attr_map
)
144 attrs
[name
] = esistools
.decode(value
)
148 ofp
.write("&%s;" % data
)
151 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
154 dump_empty_element_names(knownempties
)
157 def dump_empty_element_names(knownempties
):
159 for gi
in knownempties
:
161 knownempties
.append("")
162 if os
.path
.isfile(EMPTIES_FILENAME
):
163 fp
= open(EMPTIES_FILENAME
)
168 gi
= string
.strip(line
)
171 fp
= open(EMPTIES_FILENAME
, "w")
174 fp
.write(string
.join(gilist
, "\n"))
179 def update_gi_map(map, names
, fromsgml
=1):
180 for name
in string
.split(names
, ","):
182 uncased
= string
.lower(name
)
192 autoclose
= AUTOCLOSE
198 verbatims
= ('verbatim', 'interactive-session')
199 opts
, args
= getopt
.getopt(sys
.argv
[1:], "adesx",
200 ["autoclose=", "declare", "sgml", "xml",
201 "elements-map=", "attributes-map",
203 for opt
, arg
in opts
:
204 if opt
in ("-d", "--declare"):
209 elif opt
in ("-s", "--sgml"):
211 elif opt
in ("-x", "--xml"):
213 elif opt
in ("-a", "--autoclose"):
214 autoclose
= string
.split(arg
, ",")
215 elif opt
== "--elements-map":
216 elem_names
= ("%s,%s" % (elem_names
, arg
))[1:]
217 elif opt
== "--attributes-map":
218 attr_names
= ("%s,%s" % (attr_names
, arg
))[1:]
219 elif opt
== "--values-map":
220 value_names
= ("%s,%s" % (value_names
, arg
))[1:]
222 # open input streams:
232 ofp
= open(args
[1], "w")
237 # setup the name maps:
239 if elem_names
or attr_names
or value_names
:
240 # assume the origin was SGML; ignore case of the names from the ESIS
241 # stream but set up conversion tables to get the case right on output
242 global _normalize_case
243 _normalize_case
= string
.lower
244 update_gi_map(_elem_map
, string
.split(elem_names
, ","))
245 update_gi_map(_attr_map
, string
.split(attr_names
, ","))
246 update_gi_map(_values_map
, string
.split(value_names
, ","))
251 # run the conversion:
255 opf
.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
256 convert(ifp
, ofp
, xml
=xml
, autoclose
=autoclose
, verbatims
=verbatims
)
257 except IOError, (err
, msg
):
258 if err
!= errno
.EPIPE
:
262 if __name__
== "__main__":