3 """Generate ESIS events based on a LaTeX source document and
6 The conversion is not strong enough to work with arbitrary LaTeX
7 documents; it has only been designed to work with the highly stylized
8 markup used in the standard Python documentation. A lot of
9 information about specific markup is encoded in the control table
10 passed to the convert() function; changing this table can allow this
11 tool to support additional LaTeX markups.
13 The format of the table is largely undocumented; see the commented
14 headers where the table is specified in main(). There is no provision
15 to load an alternate table from an external file.
25 import xml
.sax
.saxutils
27 from types
import ListType
, StringType
, TupleType
30 from xml
.parsers
.xmllib
import XMLParser
32 from xmllib
import XMLParser
38 class LaTeXFormatError(Exception):
42 class LaTeXStackError(LaTeXFormatError
):
43 def __init__(self
, found
, stack
):
44 msg
= "environment close for %s doesn't match;\n stack = %s" \
48 LaTeXFormatError
.__init
__(self
, msg
)
51 s
= xml
.sax
.saxutils
.escape(s
)
52 return s
.replace("\n", "\\n\n-")
55 _begin_env_rx
= re
.compile(r
"[\\]begin{([^}]*)}")
56 _end_env_rx
= re
.compile(r
"[\\]end{([^}]*)}")
57 _begin_macro_rx
= re
.compile(r
"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
58 _comment_rx
= re
.compile("%+ ?(.*)\n[ \t]*")
59 _text_rx
= re
.compile(r
"[^]~%\\{}]+")
60 _optional_rx
= re
.compile(r
"\s*[[]([^]]*)[]]")
61 # _parameter_rx is this complicated to allow {...} inside a parameter;
62 # this is useful to match tabular layout specifications like {c|p{24pt}}
63 _parameter_rx
= re
.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
64 _token_rx
= re
.compile(r
"[a-zA-Z][a-zA-Z0-9.-]*$")
65 _start_group_rx
= re
.compile("[ \n]*{")
66 _start_optional_rx
= re
.compile("[ \n]*[[]")
69 ESCAPED_CHARS
= "$%#^ {}&~"
74 sys
.stderr
.write(msg
+ "\n")
76 def pushing(name
, point
, depth
):
77 dbgmsg("pushing <%s> at %s" % (name
, point
))
79 def popping(name
, point
, depth
):
80 dbgmsg("popping </%s> at %s" % (name
, point
))
83 class _Stack(UserList
.UserList
):
84 def append(self
, entry
):
85 if type(entry
) is not StringType
:
86 raise LaTeXFormatError("cannot push non-string on stack: "
88 sys
.stderr
.write("%s<%s>\n" % (" "*len(self
.data
), entry
))
89 self
.data
.append(entry
)
91 def pop(self
, index
=-1):
92 entry
= self
.data
[index
]
94 sys
.stderr
.write("%s</%s>\n" % (" "*len(self
.data
), entry
))
96 def __delitem__(self
, index
):
97 entry
= self
.data
[index
]
99 sys
.stderr
.write("%s</%s>\n" % (" "*len(self
.data
), entry
))
109 def __init__(self
, ifp
, ofp
, table
):
110 self
.write
= ofp
.write
113 self
.line
= string
.join(map(string
.rstrip
, ifp
.readlines()), "\n")
116 def err_write(self
, msg
):
118 sys
.stderr
.write(str(msg
) + "\n")
123 def subconvert(self
, endchar
=None, depth
=0):
125 # Parses content, including sub-structures, until the character
126 # 'endchar' is found (with no open structures), or until the end
127 # of the input data is endchar is None.
132 if line
[0] == endchar
and not stack
:
135 m
= _comment_rx
.match(line
)
139 self
.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
141 line
= line
[m
.end():]
143 m
= _begin_env_rx
.match(line
)
146 entry
= self
.get_env_entry(name
)
147 # re-write to use the macro handler
148 line
= r
"\%s %s" % (name
, line
[m
.end():])
150 m
= _end_env_rx
.match(line
)
154 entry
= self
.get_entry(envname
)
155 while stack
and envname
!= stack
[-1] \
156 and stack
[-1] in entry
.endcloses
:
157 self
.write(")%s\n" % stack
.pop())
158 if stack
and envname
== stack
[-1]:
159 self
.write(")%s\n" % entry
.outputname
)
162 raise LaTeXStackError(envname
, stack
)
163 line
= line
[m
.end():]
165 m
= _begin_macro_rx
.match(line
)
168 macroname
= m
.group(1)
170 # Ugh! This is a combining character...
172 self
.combining_char("c", line
[endpos
])
173 line
= line
[endpos
+ 1:]
175 entry
= self
.get_entry(macroname
)
178 pos
= string
.find(line
, "\\end{%s}" % macroname
)
179 text
= line
[m
.end(1):pos
]
180 stack
.append(entry
.name
)
181 self
.write("(%s\n" % entry
.outputname
)
182 self
.write("-%s\n" % encode(text
))
183 self
.write(")%s\n" % entry
.outputname
)
185 line
= line
[pos
+ len("\\end{%s}" % macroname
):]
187 while stack
and stack
[-1] in entry
.closes
:
189 topentry
= self
.get_entry(top
)
190 if topentry
.outputname
:
191 self
.write(")%s\n-\\n\n" % topentry
.outputname
)
197 params
, optional
, empty
, environ
= self
.start_macro(macroname
)
198 # rip off the macroname
200 line
= line
[m
.end(1):]
202 line
= line
[m
.end(1):]
204 line
= line
[m
.end():]
208 # handle attribute mappings here:
209 for pentry
in params
:
210 if pentry
.type == "attribute":
212 m
= _optional_rx
.match(line
)
213 if m
and entry
.outputname
:
214 line
= line
[m
.end():]
215 self
.dump_attr(pentry
, m
.group(1))
216 elif pentry
.text
and entry
.outputname
:
217 # value supplied by conversion spec:
218 self
.dump_attr(pentry
, pentry
.text
)
220 m
= _parameter_rx
.match(line
)
222 raise LaTeXFormatError(
223 "could not extract parameter %s for %s: %s"
224 % (pentry
.name
, macroname
, `line
[:100]`
))
226 self
.dump_attr(pentry
, m
.group(1))
227 line
= line
[m
.end():]
228 elif pentry
.type == "child":
230 m
= _optional_rx
.match(line
)
232 line
= line
[m
.end():]
233 if entry
.outputname
and not opened
:
235 self
.write("(%s\n" % entry
.outputname
)
236 stack
.append(macroname
)
237 stack
.append(pentry
.name
)
238 self
.write("(%s\n" % pentry
.name
)
239 self
.write("-%s\n" % encode(m
.group(1)))
240 self
.write(")%s\n" % pentry
.name
)
243 if entry
.outputname
and not opened
:
245 self
.write("(%s\n" % entry
.outputname
)
246 stack
.append(entry
.name
)
247 self
.write("(%s\n" % pentry
.name
)
248 stack
.append(pentry
.name
)
249 self
.line
= skip_white(line
)[1:]
250 line
= self
.subconvert(
251 "}", len(stack
) + depth
+ 1)[1:]
252 self
.write(")%s\n" % stack
.pop())
253 elif pentry
.type == "content":
257 if entry
.outputname
and not opened
:
259 self
.write("(%s\n" % entry
.outputname
)
260 stack
.append(entry
.name
)
261 line
= skip_white(line
)
263 raise LaTeXFormatError(
264 "missing content for " + macroname
)
266 line
= self
.subconvert("}", len(stack
) + depth
+ 1)
267 if line
and line
[0] == "}":
269 elif pentry
.type == "text" and pentry
.text
:
270 if entry
.outputname
and not opened
:
272 stack
.append(entry
.name
)
273 self
.write("(%s\n" % entry
.outputname
)
274 self
.err_write("--- text: %s\n" % `pentry
.text`
)
275 self
.write("-%s\n" % encode(pentry
.text
))
276 elif pentry
.type == "entityref":
277 self
.write("&%s\n" % pentry
.name
)
280 self
.write("(%s\n" % entry
.outputname
)
281 stack
.append(entry
.name
)
282 if not implied_content
:
283 self
.write(")%s\n" % entry
.outputname
)
286 if line
[0] == endchar
and not stack
:
290 # end of macro or group
291 macroname
= stack
[-1]
293 conversion
= self
.table
.get(macroname
)
294 if conversion
.outputname
:
295 # otherwise, it was just a bare group
296 self
.write(")%s\n" % conversion
.outputname
)
301 # don't worry about the "tie" aspect of this command
309 if line
[0] == "\\" and line
[1] in ESCAPED_CHARS
:
310 self
.write("-%s\n" % encode(line
[1]))
313 if line
[:2] == r
"\\":
314 self
.write("(BREAK\n)BREAK\n")
317 if line
[:2] == r
"\_":
318 line
= "_" + line
[2:]
320 if line
[:2] in (r
"\'", r
'\"'):
321 # combining characters...
322 self
.combining_char(line
[1], line
[2])
325 m
= _text_rx
.match(line
)
327 text
= encode(m
.group())
328 self
.write("-%s\n" % text
)
329 line
= line
[m
.end():]
331 # special case because of \item[]
332 # XXX can we axe this???
337 # avoid infinite loops
341 raise LaTeXFormatError("could not identify markup: %s%s"
342 % (`line
[:100]`
, extra
))
344 entry
= self
.get_entry(stack
[-1])
346 self
.write(")%s\n-%s\n" % (entry
.outputname
, encode("\n")))
351 raise LaTeXFormatError("elements remain on stack: "
352 + string
.join(stack
, ", "))
353 # otherwise we just ran out of input here...
355 # This is a really limited table of combinations, but it will have
363 def combining_char(self
, prefix
, char
):
364 ordinal
= self
._combinations
[(prefix
, char
)]
365 self
.write("-\\%%%d;\n" % ordinal
)
367 def start_macro(self
, name
):
368 conversion
= self
.get_entry(name
)
369 parameters
= conversion
.parameters
370 optional
= parameters
and parameters
[0].optional
371 return parameters
, optional
, conversion
.empty
, conversion
.environment
373 def get_entry(self
, name
):
374 entry
= self
.table
.get(name
)
376 self
.err_write("get_entry(%s) failing; building default entry!"
378 # not defined; build a default entry:
379 entry
= TableEntry(name
)
380 entry
.has_content
= 1
381 entry
.parameters
.append(Parameter("content"))
382 self
.table
[name
] = entry
385 def get_env_entry(self
, name
):
386 entry
= self
.table
.get(name
)
388 # not defined; build a default entry:
389 entry
= TableEntry(name
, 1)
390 entry
.has_content
= 1
391 entry
.parameters
.append(Parameter("content"))
392 entry
.parameters
[-1].implied
= 1
393 self
.table
[name
] = entry
394 elif not entry
.environment
:
395 raise LaTeXFormatError(
396 name
+ " is defined as a macro; expected environment")
399 def dump_attr(self
, pentry
, value
):
400 if not (pentry
.name
and value
):
402 if _token_rx
.match(value
):
406 self
.write("A%s %s %s\n" % (pentry
.name
, dtype
, encode(value
)))
409 def convert(ifp
, ofp
, table
):
410 c
= Conversion(ifp
, ofp
, table
)
413 except IOError, (err
, msg
):
414 if err
!= errno
.EPIPE
:
418 def skip_white(line
):
419 while line
and line
[0] in " %\n\t\r":
420 line
= string
.lstrip(line
[1:])
426 def __init__(self
, name
, environment
=0):
428 self
.outputname
= name
429 self
.environment
= environment
430 self
.empty
= not environment
439 def __init__(self
, type, name
=None, optional
=0):
442 self
.optional
= optional
447 class TableParser(XMLParser
):
448 def __init__(self
, table
=None):
452 self
.__current
= None
454 XMLParser
.__init
__(self
)
457 for entry
in self
.__table
.values():
458 if entry
.environment
and not entry
.has_content
:
459 p
= Parameter("content")
461 entry
.parameters
.append(p
)
462 entry
.has_content
= 1
465 def start_environment(self
, attrs
):
467 self
.__current
= TableEntry(name
, environment
=1)
468 self
.__current
.verbatim
= attrs
.get("verbatim") == "yes"
469 if attrs
.has_key("outputname"):
470 self
.__current
.outputname
= attrs
.get("outputname")
471 self
.__current
.endcloses
= string
.split(attrs
.get("endcloses", ""))
472 def end_environment(self
):
475 def start_macro(self
, attrs
):
477 self
.__current
= TableEntry(name
)
478 self
.__current
.closes
= string
.split(attrs
.get("closes", ""))
479 if attrs
.has_key("outputname"):
480 self
.__current
.outputname
= attrs
.get("outputname")
482 self
.__table
[self
.__current
.name
] = self
.__current
483 self
.__current
= None
485 def start_attribute(self
, attrs
):
486 name
= attrs
.get("name")
487 optional
= attrs
.get("optional") == "yes"
489 p
= Parameter("attribute", name
, optional
=optional
)
491 p
= Parameter("attribute", optional
=optional
)
492 self
.__current
.parameters
.append(p
)
494 def end_attribute(self
):
495 self
.__current
.parameters
[-1].text
= self
.__buffer
497 def start_entityref(self
, attrs
):
499 p
= Parameter("entityref", name
)
500 self
.__current
.parameters
.append(p
)
502 def start_child(self
, attrs
):
504 p
= Parameter("child", name
, attrs
.get("optional") == "yes")
505 self
.__current
.parameters
.append(p
)
506 self
.__current
.empty
= 0
508 def start_content(self
, attrs
):
509 p
= Parameter("content")
510 p
.implied
= attrs
.get("implied") == "yes"
511 if self
.__current
.environment
:
513 self
.__current
.parameters
.append(p
)
514 self
.__current
.has_content
= 1
515 self
.__current
.empty
= 0
517 def start_text(self
, attrs
):
518 self
.__current
.empty
= 0
521 p
= Parameter("text")
522 p
.text
= self
.__buffer
523 self
.__current
.parameters
.append(p
)
525 def handle_data(self
, data
):
526 self
.__buffer
= self
.__buffer
+ data
529 def load_table(fp
, table
=None):
530 parser
= TableParser(table
=table
)
531 parser
.feed(fp
.read())
533 return parser
.get_table()
539 opts
, args
= getopt
.getopt(sys
.argv
[1:], "D", ["debug"])
540 for opt
, arg
in opts
:
541 if opt
in ("-D", "--debug"):
551 ofp
= open(args
[1], "w")
556 table
= load_table(open(os
.path
.join(sys
.path
[0], 'conversion.xml')))
557 convert(ifp
, ofp
, table
)
560 if __name__
== "__main__":