3 """Convert ESIS events to SGML or XML markup.
5 This is limited, but seems sufficient for the ESIS generated by the
6 latex2esis.py script when run over the Python documentation.
9 # This should have an explicit option to indicate whether the *INPUT* was
10 # generated from an SGML or an XML application.
12 __version__
= '$Revision$'
20 from xml
.utils
import escape
25 EMPTIES_FILENAME
= "../sgml/empties.dat"
35 def map_gi(sgmlgi
, map):
36 uncased
= _normalize_case(sgmlgi
)
43 def null_map_gi(sgmlgi
, map):
47 def format_attrs(attrs
, xml
=0):
52 for name
, value
in attrs
:
54 append('%s="%s"' % (name
, escape(value
)))
56 # this is a little bogus, but should do for now
57 if name
== value
and isnmtoken(value
):
60 if value
== "no" + name
:
63 append("%s=%s" % (name
, value
))
65 append('%s="%s"' % (name
, escape(value
)))
68 return string
.join(parts
)
71 _nmtoken_rx
= re
.compile("[a-z][-._a-z0-9]*$", re
.IGNORECASE
)
73 return _nmtoken_rx
.match(s
) is not None
75 _token_rx
= re
.compile("[a-z0-9][-._a-z0-9]*$", re
.IGNORECASE
)
77 return _token_rx
.match(s
) is not None
80 def convert(ifp
, ofp
, xml
=0, autoclose
=(), verbatims
=()):
96 if data
and data
[-1] == "\n":
99 data
= esistools
.decode(data
)
102 data
= string
.replace(data
, "---", "—")
109 if data
== "COMMENT":
112 data
= map_gi(data
, _elem_map
)
113 if knownempty
and xml
:
114 ofp
.write("<%s%s/>" % (data
, format_attrs(attrs
, xml
)))
116 ofp
.write("<%s%s>" % (data
, format_attrs(attrs
, xml
)))
117 if knownempty
and data
not in knownempties
:
118 # accumulate knowledge!
119 knownempties
.append(data
)
122 lastempty
= knownempty
124 inverbatim
= data
in verbatims
126 if data
== "COMMENT":
129 data
= map_gi(data
, _elem_map
)
132 ofp
.write("</%s>" % data
)
133 elif data
not in knownempties
:
134 if data
in autoclose
:
136 elif lastopened
== data
:
139 ofp
.write("</%s>" % data
)
144 name
, type, value
= string
.split(data
, " ", 2)
145 name
= map_gi(name
, _attr_map
)
146 attrs
[name
] = esistools
.decode(value
)
150 ofp
.write("&%s;" % data
)
153 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
156 dump_empty_element_names(knownempties
)
159 def dump_empty_element_names(knownempties
):
161 for gi
in knownempties
:
163 knownempties
.append("")
164 if os
.path
.isfile(EMPTIES_FILENAME
):
165 fp
= open(EMPTIES_FILENAME
)
170 gi
= string
.strip(line
)
173 fp
= open(EMPTIES_FILENAME
, "w")
176 fp
.write(string
.join(gilist
, "\n"))
181 def update_gi_map(map, names
, fromsgml
=1):
182 for name
in string
.split(names
, ","):
184 uncased
= string
.lower(name
)
194 autoclose
= AUTOCLOSE
200 verbatims
= ('verbatim', 'interactive-session')
201 opts
, args
= getopt
.getopt(sys
.argv
[1:], "adesx",
202 ["autoclose=", "declare", "sgml", "xml",
203 "elements-map=", "attributes-map",
205 for opt
, arg
in opts
:
206 if opt
in ("-d", "--declare"):
211 elif opt
in ("-s", "--sgml"):
213 elif opt
in ("-x", "--xml"):
215 elif opt
in ("-a", "--autoclose"):
216 autoclose
= string
.split(arg
, ",")
217 elif opt
== "--elements-map":
218 elem_names
= ("%s,%s" % (elem_names
, arg
))[1:]
219 elif opt
== "--attributes-map":
220 attr_names
= ("%s,%s" % (attr_names
, arg
))[1:]
221 elif opt
== "--values-map":
222 value_names
= ("%s,%s" % (value_names
, arg
))[1:]
224 # open input streams:
234 ofp
= open(args
[1], "w")
239 # setup the name maps:
241 if elem_names
or attr_names
or value_names
:
242 # assume the origin was SGML; ignore case of the names from the ESIS
243 # stream but set up conversion tables to get the case right on output
244 global _normalize_case
245 _normalize_case
= string
.lower
246 update_gi_map(_elem_map
, string
.split(elem_names
, ","))
247 update_gi_map(_attr_map
, string
.split(attr_names
, ","))
248 update_gi_map(_values_map
, string
.split(value_names
, ","))
253 # run the conversion:
257 opf
.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
258 convert(ifp
, ofp
, xml
=xml
, autoclose
=autoclose
, verbatims
=verbatims
)
259 except IOError, (err
, msg
):
260 if err
!= errno
.EPIPE
:
264 if __name__
== "__main__":