3 """Generate ESIS events based on a LaTeX source document and
6 The conversion is not strong enough to work with arbitrary LaTeX
7 documents; it has only been designed to work with the highly stylized
8 markup used in the standard Python documentation. A lot of
9 information about specific markup is encoded in the control table
10 passed to the convert() function; changing this table can allow this
11 tool to support additional LaTeX markups.
13 The format of the table is largely undocumented; see the commented
14 headers where the table is specified in main(). There is no provision
15 to load an alternate table from an external file.
26 import xml
.sax
.saxutils
28 from types
import ListType
, StringType
, TupleType
30 from esistools
import encode
36 class LaTeXFormatError(Exception):
40 class LaTeXStackError(LaTeXFormatError
):
41 def __init__(self
, found
, stack
):
42 msg
= "environment close for %s doesn't match;\n stack = %s" \
46 LaTeXFormatError
.__init
__(self
, msg
)
49 _begin_env_rx
= re
.compile(r
"[\\]begin{([^}]*)}")
50 _end_env_rx
= re
.compile(r
"[\\]end{([^}]*)}")
51 _begin_macro_rx
= re
.compile(r
"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
52 _comment_rx
= re
.compile("%+ ?(.*)\n[ \t]*")
53 _text_rx
= re
.compile(r
"[^]~%\\{}]+")
54 _optional_rx
= re
.compile(r
"\s*[[]([^]]*)[]]", re
.MULTILINE
)
55 # _parameter_rx is this complicated to allow {...} inside a parameter;
56 # this is useful to match tabular layout specifications like {c|p{24pt}}
57 _parameter_rx
= re
.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
58 _token_rx
= re
.compile(r
"[a-zA-Z][a-zA-Z0-9.-]*$")
59 _start_group_rx
= re
.compile("[ \n]*{")
60 _start_optional_rx
= re
.compile("[ \n]*[[]")
63 ESCAPED_CHARS
= "$%#^ {}&~"
68 sys
.stderr
.write(msg
+ "\n")
70 def pushing(name
, point
, depth
):
71 dbgmsg("pushing <%s> at %s" % (name
, point
))
73 def popping(name
, point
, depth
):
74 dbgmsg("popping </%s> at %s" % (name
, point
))
77 class _Stack(UserList
.UserList
):
78 def append(self
, entry
):
79 if type(entry
) is not StringType
:
80 raise LaTeXFormatError("cannot push non-string on stack: "
82 #dbgmsg("%s<%s>" % (" "*len(self.data), entry))
83 self
.data
.append(entry
)
85 def pop(self
, index
=-1):
86 entry
= self
.data
[index
]
88 #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
90 def __delitem__(self
, index
):
91 entry
= self
.data
[index
]
93 #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
103 def __init__(self
, ifp
, ofp
, table
):
104 self
.write
= ofp
.write
107 L
= [s
.rstrip() for s
in ifp
.readlines()]
109 self
.line
= string
.join(L
, "\n")
115 def subconvert(self
, endchar
=None, depth
=0):
117 # Parses content, including sub-structures, until the character
118 # 'endchar' is found (with no open structures), or until the end
119 # of the input data is endchar is None.
124 if line
[0] == endchar
and not stack
:
127 m
= _comment_rx
.match(line
)
131 self
.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
133 line
= line
[m
.end():]
135 m
= _begin_env_rx
.match(line
)
138 entry
= self
.get_env_entry(name
)
139 # re-write to use the macro handler
140 line
= r
"\%s %s" % (name
, line
[m
.end():])
142 m
= _end_env_rx
.match(line
)
146 entry
= self
.get_entry(envname
)
147 while stack
and envname
!= stack
[-1] \
148 and stack
[-1] in entry
.endcloses
:
149 self
.write(")%s\n" % stack
.pop())
150 if stack
and envname
== stack
[-1]:
151 self
.write(")%s\n" % entry
.outputname
)
154 raise LaTeXStackError(envname
, stack
)
155 line
= line
[m
.end():]
157 m
= _begin_macro_rx
.match(line
)
160 macroname
= m
.group(1)
162 # Ugh! This is a combining character...
164 self
.combining_char("c", line
[endpos
])
165 line
= line
[endpos
+ 1:]
167 entry
= self
.get_entry(macroname
)
170 pos
= line
.find("\\end{%s}" % macroname
)
171 text
= line
[m
.end(1):pos
]
172 stack
.append(entry
.name
)
173 self
.write("(%s\n" % entry
.outputname
)
174 self
.write("-%s\n" % encode(text
))
175 self
.write(")%s\n" % entry
.outputname
)
177 line
= line
[pos
+ len("\\end{%s}" % macroname
):]
179 while stack
and stack
[-1] in entry
.closes
:
181 topentry
= self
.get_entry(top
)
182 if topentry
.outputname
:
183 self
.write(")%s\n-\\n\n" % topentry
.outputname
)
185 if entry
.outputname
and entry
.empty
:
188 params
, optional
, empty
= self
.start_macro(macroname
)
189 # rip off the macroname
191 line
= line
[m
.end(1):]
193 line
= line
[m
.end(1):]
195 line
= line
[m
.end():]
199 # handle attribute mappings here:
200 for pentry
in params
:
201 if pentry
.type == "attribute":
203 m
= _optional_rx
.match(line
)
204 if m
and entry
.outputname
:
205 line
= line
[m
.end():]
206 self
.dump_attr(pentry
, m
.group(1))
207 elif pentry
.text
and entry
.outputname
:
208 # value supplied by conversion spec:
209 self
.dump_attr(pentry
, pentry
.text
)
211 m
= _parameter_rx
.match(line
)
213 raise LaTeXFormatError(
214 "could not extract parameter %s for %s: %s"
215 % (pentry
.name
, macroname
, `line
[:100]`
))
217 self
.dump_attr(pentry
, m
.group(1))
218 line
= line
[m
.end():]
219 elif pentry
.type == "child":
221 m
= _optional_rx
.match(line
)
223 line
= line
[m
.end():]
224 if entry
.outputname
and not opened
:
226 self
.write("(%s\n" % entry
.outputname
)
227 stack
.append(macroname
)
228 stack
.append(pentry
.name
)
229 self
.write("(%s\n" % pentry
.name
)
230 self
.write("-%s\n" % encode(m
.group(1)))
231 self
.write(")%s\n" % pentry
.name
)
234 if entry
.outputname
and not opened
:
236 self
.write("(%s\n" % entry
.outputname
)
237 stack
.append(entry
.name
)
238 self
.write("(%s\n" % pentry
.name
)
239 stack
.append(pentry
.name
)
240 self
.line
= skip_white(line
)[1:]
241 line
= self
.subconvert(
242 "}", len(stack
) + depth
+ 1)[1:]
243 self
.write(")%s\n" % stack
.pop())
244 elif pentry
.type == "content":
248 if entry
.outputname
and not opened
:
250 self
.write("(%s\n" % entry
.outputname
)
251 stack
.append(entry
.name
)
252 line
= skip_white(line
)
254 raise LaTeXFormatError(
255 "missing content for " + macroname
)
257 line
= self
.subconvert("}", len(stack
) + depth
+ 1)
258 if line
and line
[0] == "}":
260 elif pentry
.type == "text" and pentry
.text
:
261 if entry
.outputname
and not opened
:
263 stack
.append(entry
.name
)
264 self
.write("(%s\n" % entry
.outputname
)
265 #dbgmsg("--- text: %s" % `pentry.text`)
266 self
.write("-%s\n" % encode(pentry
.text
))
267 elif pentry
.type == "entityref":
268 self
.write("&%s\n" % pentry
.name
)
271 self
.write("(%s\n" % entry
.outputname
)
272 stack
.append(entry
.name
)
273 if not implied_content
:
274 self
.write(")%s\n" % entry
.outputname
)
277 if line
[0] == endchar
and not stack
:
281 # end of macro or group
282 macroname
= stack
[-1]
284 conversion
= self
.table
[macroname
]
285 if conversion
.outputname
:
286 # otherwise, it was just a bare group
287 self
.write(")%s\n" % conversion
.outputname
)
292 # don't worry about the "tie" aspect of this command
300 if line
[0] == "\\" and line
[1] in ESCAPED_CHARS
:
301 self
.write("-%s\n" % encode(line
[1]))
304 if line
[:2] == r
"\\":
305 self
.write("(BREAK\n)BREAK\n")
308 if line
[:2] == r
"\_":
309 line
= "_" + line
[2:]
311 if line
[:2] in (r
"\'", r
'\"'):
312 # combining characters...
313 self
.combining_char(line
[1], line
[2])
316 m
= _text_rx
.match(line
)
318 text
= encode(m
.group())
319 self
.write("-%s\n" % text
)
320 line
= line
[m
.end():]
322 # special case because of \item[]
323 # XXX can we axe this???
328 # avoid infinite loops
332 raise LaTeXFormatError("could not identify markup: %s%s"
333 % (`line
[:100]`
, extra
))
335 entry
= self
.get_entry(stack
[-1])
337 self
.write(")%s\n-%s\n" % (entry
.outputname
, encode("\n")))
342 raise LaTeXFormatError("elements remain on stack: "
343 + string
.join(stack
, ", "))
344 # otherwise we just ran out of input here...
346 # This is a really limited table of combinations, but it will have
354 def combining_char(self
, prefix
, char
):
355 ordinal
= self
._combinations
[(prefix
, char
)]
356 self
.write("-\\%%%d;\n" % ordinal
)
358 def start_macro(self
, name
):
359 conversion
= self
.get_entry(name
)
360 parameters
= conversion
.parameters
361 optional
= parameters
and parameters
[0].optional
362 return parameters
, optional
, conversion
.empty
364 def get_entry(self
, name
):
365 entry
= self
.table
.get(name
)
367 dbgmsg("get_entry(%s) failing; building default entry!" % `name`
)
368 # not defined; build a default entry:
369 entry
= TableEntry(name
)
370 entry
.has_content
= 1
371 entry
.parameters
.append(Parameter("content"))
372 self
.table
[name
] = entry
375 def get_env_entry(self
, name
):
376 entry
= self
.table
.get(name
)
378 # not defined; build a default entry:
379 entry
= TableEntry(name
, 1)
380 entry
.has_content
= 1
381 entry
.parameters
.append(Parameter("content"))
382 entry
.parameters
[-1].implied
= 1
383 self
.table
[name
] = entry
384 elif not entry
.environment
:
385 raise LaTeXFormatError(
386 name
+ " is defined as a macro; expected environment")
389 def dump_attr(self
, pentry
, value
):
390 if not (pentry
.name
and value
):
392 if _token_rx
.match(value
):
396 self
.write("A%s %s %s\n" % (pentry
.name
, dtype
, encode(value
)))
399 def convert(ifp
, ofp
, table
):
400 c
= Conversion(ifp
, ofp
, table
)
403 except IOError, (err
, msg
):
404 if err
!= errno
.EPIPE
:
408 def skip_white(line
):
409 while line
and line
[0] in " %\n\t\r":
410 line
= line
[1:].lstrip()
416 def __init__(self
, name
, environment
=0):
418 self
.outputname
= name
419 self
.environment
= environment
420 self
.empty
= not environment
429 def __init__(self
, type, name
=None, optional
=0):
432 self
.optional
= optional
437 class TableHandler(xml
.sax
.handler
.ContentHandler
):
444 for entry
in self
.__table
.values():
445 if entry
.environment
and not entry
.has_content
:
446 p
= Parameter("content")
448 entry
.parameters
.append(p
)
449 entry
.has_content
= 1
452 def startElement(self
, tag
, attrs
):
454 start
, end
= self
.__methods
[tag
]
456 start
= getattr(self
, "start_" + tag
, None)
457 end
= getattr(self
, "end_" + tag
, None)
458 self
.__methods
[tag
] = (start
, end
)
462 def endElement(self
, tag
):
463 start
, end
= self
.__methods
[tag
]
467 def endDocument(self
):
468 self
.__methods
.clear()
470 def characters(self
, data
):
471 self
.__buffer
+= data
473 def start_environment(self
, attrs
):
475 self
.__current
= TableEntry(name
, environment
=1)
476 self
.__current
.verbatim
= attrs
.get("verbatim") == "yes"
477 if attrs
.has_key("outputname"):
478 self
.__current
.outputname
= attrs
.get("outputname")
479 self
.__current
.endcloses
= attrs
.get("endcloses", "").split()
480 def end_environment(self
):
483 def start_macro(self
, attrs
):
485 self
.__current
= TableEntry(name
)
486 self
.__current
.closes
= attrs
.get("closes", "").split()
487 if attrs
.has_key("outputname"):
488 self
.__current
.outputname
= attrs
.get("outputname")
490 name
= self
.__current
.name
491 if self
.__table
.has_key(name
):
492 raise ValueError("name %s already in use" % `name`
)
493 self
.__table
[name
] = self
.__current
494 self
.__current
= None
496 def start_attribute(self
, attrs
):
497 name
= attrs
.get("name")
498 optional
= attrs
.get("optional") == "yes"
500 p
= Parameter("attribute", name
, optional
=optional
)
502 p
= Parameter("attribute", optional
=optional
)
503 self
.__current
.parameters
.append(p
)
505 def end_attribute(self
):
506 self
.__current
.parameters
[-1].text
= self
.__buffer
508 def start_entityref(self
, attrs
):
510 p
= Parameter("entityref", name
)
511 self
.__current
.parameters
.append(p
)
513 def start_child(self
, attrs
):
515 p
= Parameter("child", name
, attrs
.get("optional") == "yes")
516 self
.__current
.parameters
.append(p
)
517 self
.__current
.empty
= 0
519 def start_content(self
, attrs
):
520 p
= Parameter("content")
521 p
.implied
= attrs
.get("implied") == "yes"
522 if self
.__current
.environment
:
524 self
.__current
.parameters
.append(p
)
525 self
.__current
.has_content
= 1
526 self
.__current
.empty
= 0
528 def start_text(self
, attrs
):
529 self
.__current
.empty
= 0
532 p
= Parameter("text")
533 p
.text
= self
.__buffer
534 self
.__current
.parameters
.append(p
)
539 xml
.sax
.parse(fp
, ch
)
540 return ch
.get_table()
546 opts
, args
= getopt
.getopt(sys
.argv
[1:], "D", ["debug"])
547 for opt
, arg
in opts
:
548 if opt
in ("-D", "--debug"):
558 ofp
= open(args
[1], "w")
563 table
= load_table(open(os
.path
.join(sys
.path
[0], 'conversion.xml')))
564 convert(ifp
, ofp
, table
)
567 if __name__
== "__main__":