3 """Generate ESIS events based on a LaTeX source document and
6 The conversion is not strong enough to work with arbitrary LaTeX
7 documents; it has only been designed to work with the highly stylized
8 markup used in the standard Python documentation. A lot of
9 information about specific markup is encoded in the control table
10 passed to the convert() function; changing this table can allow this
11 tool to support additional LaTeX markups.
13 The format of the table is largely undocumented; see the commented
14 headers where the table is specified in main(). There is no provision
15 to load an alternate table from an external file.
17 __version__
= '$Revision$'
29 from esistools
import encode
30 from types
import ListType
, StringType
, TupleType
33 from xml
.parsers
.xmllib
import XMLParser
35 from xmllib
import XMLParser
41 class LaTeXFormatError(Exception):
45 class LaTeXStackError(LaTeXFormatError
):
46 def __init__(self
, found
, stack
):
47 msg
= "environment close for %s doesn't match;\n stack = %s" \
51 LaTeXFormatError
.__init
__(self
, msg
)
54 _begin_env_rx
= re
.compile(r
"[\\]begin{([^}]*)}")
55 _end_env_rx
= re
.compile(r
"[\\]end{([^}]*)}")
56 _begin_macro_rx
= re
.compile(r
"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
57 _comment_rx
= re
.compile("%+ ?(.*)\n[ \t]*")
58 _text_rx
= re
.compile(r
"[^]%\\{}]+")
59 _optional_rx
= re
.compile(r
"\s*[[]([^]]*)[]]")
60 # _parameter_rx is this complicated to allow {...} inside a parameter;
61 # this is useful to match tabular layout specifications like {c|p{24pt}}
62 _parameter_rx
= re
.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
63 _token_rx
= re
.compile(r
"[a-zA-Z][a-zA-Z0-9.-]*$")
64 _start_group_rx
= re
.compile("[ \n]*{")
65 _start_optional_rx
= re
.compile("[ \n]*[[]")
68 ESCAPED_CHARS
= "$%#^ {}&~"
73 sys
.stderr
.write(msg
+ "\n")
75 def pushing(name
, point
, depth
):
76 dbgmsg("pushing <%s> at %s" % (name
, point
))
78 def popping(name
, point
, depth
):
79 dbgmsg("popping </%s> at %s" % (name
, point
))
82 class _Stack(UserList
.UserList
):
83 def append(self
, entry
):
84 if type(entry
) is not StringType
:
85 raise LaTeXFormatError("cannot push non-string on stack: "
87 sys
.stderr
.write("%s<%s>\n" % (" "*len(self
.data
), entry
))
88 self
.data
.append(entry
)
90 def pop(self
, index
=-1):
91 entry
= self
.data
[index
]
93 sys
.stderr
.write("%s</%s>\n" % (" "*len(self
.data
), entry
))
95 def __delitem__(self
, index
):
96 entry
= self
.data
[index
]
98 sys
.stderr
.write("%s</%s>\n" % (" "*len(self
.data
), entry
))
108 def __init__(self
, ifp
, ofp
, table
):
109 self
.write
= ofp
.write
112 self
.line
= string
.join(map(string
.rstrip
, ifp
.readlines()), "\n")
115 def err_write(self
, msg
):
117 sys
.stderr
.write(str(msg
) + "\n")
122 def subconvert(self
, endchar
=None, depth
=0):
124 # Parses content, including sub-structures, until the character
125 # 'endchar' is found (with no open structures), or until the end
126 # of the input data is endchar is None.
131 if line
[0] == endchar
and not stack
:
134 m
= _comment_rx
.match(line
)
138 self
.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
140 line
= line
[m
.end():]
142 m
= _begin_env_rx
.match(line
)
145 entry
= self
.get_env_entry(name
)
146 # re-write to use the macro handler
147 line
= r
"\%s %s" % (name
, line
[m
.end():])
149 m
= _end_env_rx
.match(line
)
153 entry
= self
.get_entry(envname
)
154 while stack
and envname
!= stack
[-1] \
155 and stack
[-1] in entry
.endcloses
:
156 self
.write(")%s\n" % stack
.pop())
157 if stack
and envname
== stack
[-1]:
158 self
.write(")%s\n" % entry
.outputname
)
161 raise LaTeXStackError(envname
, stack
)
162 line
= line
[m
.end():]
164 m
= _begin_macro_rx
.match(line
)
167 macroname
= m
.group(1)
168 entry
= self
.get_entry(macroname
)
171 pos
= string
.find(line
, "\\end{%s}" % macroname
)
172 text
= line
[m
.end(1):pos
]
173 stack
.append(entry
.name
)
174 self
.write("(%s\n" % entry
.outputname
)
175 self
.write("-%s\n" % encode(text
))
176 self
.write(")%s\n" % entry
.outputname
)
178 line
= line
[pos
+ len("\\end{%s}" % macroname
):]
180 while stack
and stack
[-1] in entry
.closes
:
182 topentry
= self
.get_entry(top
)
183 if topentry
.outputname
:
184 self
.write(")%s\n-\\n\n" % topentry
.outputname
)
190 params
, optional
, empty
, environ
= self
.start_macro(macroname
)
191 # rip off the macroname
193 line
= line
[m
.end(1):]
195 line
= line
[m
.end(1):]
197 line
= line
[m
.end():]
201 # handle attribute mappings here:
202 for pentry
in params
:
203 if pentry
.type == "attribute":
205 m
= _optional_rx
.match(line
)
206 if m
and entry
.outputname
:
207 line
= line
[m
.end():]
208 self
.dump_attr(pentry
, m
.group(1))
209 elif pentry
.text
and entry
.outputname
:
210 # value supplied by conversion spec:
211 self
.dump_attr(pentry
, pentry
.text
)
213 m
= _parameter_rx
.match(line
)
215 raise LaTeXFormatError(
216 "could not extract parameter %s for %s: %s"
217 % (pentry
.name
, macroname
, `line
[:100]`
))
219 self
.dump_attr(pentry
, m
.group(1))
220 line
= line
[m
.end():]
221 elif pentry
.type == "child":
223 m
= _optional_rx
.match(line
)
225 line
= line
[m
.end():]
226 if entry
.outputname
and not opened
:
228 self
.write("(%s\n" % entry
.outputname
)
229 stack
.append(macroname
)
230 stack
.append(pentry
.name
)
231 self
.write("(%s\n" % pentry
.name
)
232 self
.write("-%s\n" % encode(m
.group(1)))
233 self
.write(")%s\n" % pentry
.name
)
236 if entry
.outputname
and not opened
:
238 self
.write("(%s\n" % entry
.outputname
)
239 stack
.append(entry
.name
)
240 self
.write("(%s\n" % pentry
.name
)
241 stack
.append(pentry
.name
)
242 self
.line
= skip_white(line
)[1:]
243 line
= self
.subconvert(
244 "}", len(stack
) + depth
+ 1)[1:]
245 self
.write(")%s\n" % stack
.pop())
246 elif pentry
.type == "content":
250 if entry
.outputname
and not opened
:
252 self
.write("(%s\n" % entry
.outputname
)
253 stack
.append(entry
.name
)
254 line
= skip_white(line
)
256 raise LaTeXFormatError(
257 "missing content for " + macroname
)
259 line
= self
.subconvert("}", len(stack
) + depth
+ 1)
260 if line
and line
[0] == "}":
262 elif pentry
.type == "text" and pentry
.text
:
263 if entry
.outputname
and not opened
:
265 stack
.append(entry
.name
)
266 self
.write("(%s\n" % entry
.outputname
)
267 self
.err_write("--- text: %s\n" % `pentry
.text`
)
268 self
.write("-%s\n" % encode(pentry
.text
))
269 elif pentry
.type == "entityref":
270 self
.write("&%s\n" % pentry
.name
)
273 self
.write("(%s\n" % entry
.outputname
)
274 stack
.append(entry
.name
)
275 if not implied_content
:
276 self
.write(")%s\n" % entry
.outputname
)
279 if line
[0] == endchar
and not stack
:
283 # end of macro or group
284 macroname
= stack
[-1]
286 conversion
= self
.table
.get(macroname
)
287 if conversion
.outputname
:
288 # otherwise, it was just a bare group
289 self
.write(")%s\n" % conversion
.outputname
)
297 if line
[0] == "\\" and line
[1] in ESCAPED_CHARS
:
298 self
.write("-%s\n" % encode(line
[1]))
301 if line
[:2] == r
"\\":
302 self
.write("(BREAK\n)BREAK\n")
305 m
= _text_rx
.match(line
)
307 text
= encode(m
.group())
308 self
.write("-%s\n" % text
)
309 line
= line
[m
.end():]
311 # special case because of \item[]
312 # XXX can we axe this???
317 # avoid infinite loops
321 raise LaTeXFormatError("could not identify markup: %s%s"
322 % (`line
[:100]`
, extra
))
324 entry
= self
.get_entry(stack
[-1])
326 self
.write(")%s\n-%s\n" % (entry
.outputname
, encode("\n")))
331 raise LaTeXFormatError("elements remain on stack: "
332 + string
.join(stack
, ", "))
333 # otherwise we just ran out of input here...
335 def start_macro(self
, name
):
336 conversion
= self
.get_entry(name
)
337 parameters
= conversion
.parameters
338 optional
= parameters
and parameters
[0].optional
339 return parameters
, optional
, conversion
.empty
, conversion
.environment
341 def get_entry(self
, name
):
342 entry
= self
.table
.get(name
)
344 self
.err_write("get_entry(%s) failing; building default entry!"
346 # not defined; build a default entry:
347 entry
= TableEntry(name
)
348 entry
.has_content
= 1
349 entry
.parameters
.append(Parameter("content"))
350 self
.table
[name
] = entry
353 def get_env_entry(self
, name
):
354 entry
= self
.table
.get(name
)
356 # not defined; build a default entry:
357 entry
= TableEntry(name
, 1)
358 entry
.has_content
= 1
359 entry
.parameters
.append(Parameter("content"))
360 entry
.parameters
[-1].implied
= 1
361 self
.table
[name
] = entry
362 elif not entry
.environment
:
363 raise LaTeXFormatError(
364 name
+ " is defined as a macro; expected environment")
367 def dump_attr(self
, pentry
, value
):
368 if not (pentry
.name
and value
):
370 if _token_rx
.match(value
):
374 self
.write("A%s %s %s\n" % (pentry
.name
, dtype
, encode(value
)))
377 def convert(ifp
, ofp
, table
):
378 c
= Conversion(ifp
, ofp
, table
)
381 except IOError, (err
, msg
):
382 if err
!= errno
.EPIPE
:
386 def skip_white(line
):
387 while line
and line
[0] in " %\n\t\r":
388 line
= string
.lstrip(line
[1:])
394 def __init__(self
, name
, environment
=0):
396 self
.outputname
= name
397 self
.environment
= environment
398 self
.empty
= not environment
407 def __init__(self
, type, name
=None, optional
=0):
410 self
.optional
= optional
415 class TableParser(XMLParser
):
416 def __init__(self
, table
=None):
420 self
.__current
= None
422 XMLParser
.__init
__(self
)
425 for entry
in self
.__table
.values():
426 if entry
.environment
and not entry
.has_content
:
427 p
= Parameter("content")
429 entry
.parameters
.append(p
)
430 entry
.has_content
= 1
433 def start_environment(self
, attrs
):
435 self
.__current
= TableEntry(name
, environment
=1)
436 self
.__current
.verbatim
= attrs
.get("verbatim") == "yes"
437 if attrs
.has_key("outputname"):
438 self
.__current
.outputname
= attrs
.get("outputname")
439 self
.__current
.endcloses
= string
.split(attrs
.get("endcloses", ""))
440 def end_environment(self
):
443 def start_macro(self
, attrs
):
445 self
.__current
= TableEntry(name
)
446 self
.__current
.closes
= string
.split(attrs
.get("closes", ""))
447 if attrs
.has_key("outputname"):
448 self
.__current
.outputname
= attrs
.get("outputname")
450 self
.__table
[self
.__current
.name
] = self
.__current
451 self
.__current
= None
453 def start_attribute(self
, attrs
):
454 name
= attrs
.get("name")
455 optional
= attrs
.get("optional") == "yes"
457 p
= Parameter("attribute", name
, optional
=optional
)
459 p
= Parameter("attribute", optional
=optional
)
460 self
.__current
.parameters
.append(p
)
462 def end_attribute(self
):
463 self
.__current
.parameters
[-1].text
= self
.__buffer
465 def start_entityref(self
, attrs
):
467 p
= Parameter("entityref", name
)
468 self
.__current
.parameters
.append(p
)
470 def start_child(self
, attrs
):
472 p
= Parameter("child", name
, attrs
.get("optional") == "yes")
473 self
.__current
.parameters
.append(p
)
474 self
.__current
.empty
= 0
476 def start_content(self
, attrs
):
477 p
= Parameter("content")
478 p
.implied
= attrs
.get("implied") == "yes"
479 if self
.__current
.environment
:
481 self
.__current
.parameters
.append(p
)
482 self
.__current
.has_content
= 1
483 self
.__current
.empty
= 0
485 def start_text(self
, attrs
):
486 self
.__current
.empty
= 0
489 p
= Parameter("text")
490 p
.text
= self
.__buffer
491 self
.__current
.parameters
.append(p
)
493 def handle_data(self
, data
):
494 self
.__buffer
= self
.__buffer
+ data
497 def load_table(fp
, table
=None):
498 parser
= TableParser(table
=table
)
499 parser
.feed(fp
.read())
501 return parser
.get_table()
507 opts
, args
= getopt
.getopt(sys
.argv
[1:], "D", ["debug"])
508 for opt
, arg
in opts
:
509 if opt
in ("-D", "--debug"):
519 ofp
= open(args
[1], "w")
524 table
= load_table(open(os
.path
.join(sys
.path
[0], 'conversion.xml')))
525 convert(ifp
, ofp
, table
)
528 if __name__
== "__main__":