3 """Generate ESIS events based on a LaTeX source document and configuration
6 __version__
= '$Revision$'
14 from esistools
import encode
20 class Error(Exception):
23 class LaTeXFormatError(Error
):
27 _begin_env_rx
= re
.compile(r
"[\\]begin{([^}]*)}")
28 _end_env_rx
= re
.compile(r
"[\\]end{([^}]*)}")
29 _begin_macro_rx
= re
.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
30 _comment_rx
= re
.compile("%+ ?(.*)\n *")
31 _text_rx
= re
.compile(r
"[^]%\\{}]+")
32 _optional_rx
= re
.compile(r
"\s*[[]([^]]*)[]]")
33 # _parameter_rx is this complicated to allow {...} inside a parameter;
34 # this is useful to match tabular layout specifications like {c|p{24pt}}
35 _parameter_rx
= re
.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
36 _token_rx
= re
.compile(r
"[a-zA-Z][a-zA-Z0-9.-]*$")
37 _start_group_rx
= re
.compile("[ \n]*{")
38 _start_optional_rx
= re
.compile("[ \n]*[[]")
41 ESCAPED_CHARS
= "$%#^ {}&~"
44 def pushing(name
, point
, depth
):
46 sys
.stderr
.write("%s<%s> at %s\n" % (" "*depth
, name
, point
))
48 def popping(name
, point
, depth
):
50 sys
.stderr
.write("%s</%s> at %s\n" % (" "*depth
, name
, point
))
53 def subconvert(line
, ofp
, table
, discards
, autoclosing
, endchar
=None, depth
=0):
55 sys
.stderr
.write("subconvert(%s, ..., endchar=%s)\n"
56 % (`line
[:20]`
, `endchar`
))
59 if line
[0] == endchar
and not stack
:
61 sys
.stderr
.write("subconvert() --> %s\n" % `line
[1:21]`
)
63 m
= _comment_rx
.match(line
)
67 ofp
.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text
))
70 m
= _begin_env_rx
.match(line
)
72 # re-write to use the macro handler
73 line
= r
"\%s %s" % (m
.group(1), line
[m
.end():])
75 m
= _end_env_rx
.match(line
)
79 if envname
== "document":
82 if n
not in autoclosing
:
83 raise LaTeXFormatError("open element on stack: " + `n`
)
84 # should be more careful, but this is easier to code:
86 ofp
.write(")document\n")
87 elif envname
== stack
[-1]:
88 ofp
.write(")%s\n" % envname
)
90 popping(envname
, "a", len(stack
) + depth
)
92 sys
.stderr
.write("stack: %s\n" % `stack`
)
93 raise LaTeXFormatError(
94 "environment close for %s doesn't match" % envname
)
97 m
= _begin_macro_rx
.match(line
)
100 macroname
= m
.group(1)
101 if macroname
== "verbatim":
103 pos
= string
.find(line
, "\\end{verbatim}")
104 text
= line
[m
.end(1):pos
]
105 ofp
.write("(verbatim\n")
106 ofp
.write("-%s\n" % encode(text
))
107 ofp
.write(")verbatim\n")
108 line
= line
[pos
+ len("\\end{verbatim}"):]
111 if macroname
[-1] == "*":
112 macroname
= macroname
[:-1]
114 if macroname
in autoclosing
and macroname
in stack
:
115 while stack
[-1] != macroname
:
116 if stack
[-1] and stack
[-1] not in discards
:
117 ofp
.write(")%s\n-\\n\n" % stack
[-1])
118 popping(stack
[-1], "b", len(stack
) + depth
- 1)
120 if macroname
not in discards
:
121 ofp
.write("-\\n\n)%s\n-\\n\n" % macroname
)
122 popping(macroname
, "c", len(stack
) + depth
- 1)
125 if macroname
in discards
:
126 ofp
= StringIO
.StringIO()
128 conversion
= table
.get(macroname
, ([], 0, 0, 0, 0))
129 params
, optional
, empty
, environ
, nocontent
= conversion
135 ofp
.write("Anumbered TOKEN no\n")
137 # rip off the macroname
139 if optional
and len(params
) == 1:
140 line
= line
= line
[m
.end():]
142 line
= line
[m
.end(1):]
144 line
= line
[m
.end(1):]
146 line
= line
[m
.end():]
148 # Very ugly special case to deal with \item[]. The catch is that
149 # this needs to occur outside the for loop that handles attribute
150 # parsing so we can 'continue' the outer loop.
152 if optional
and type(params
[0]) is type(()):
153 # the attribute name isn't used in this special case
154 pushing(macroname
, "a", depth
+ len(stack
))
155 stack
.append(macroname
)
156 ofp
.write("(%s\n" % macroname
)
157 m
= _start_optional_rx
.match(line
)
159 line
= line
[m
.end():]
160 line
= subconvert(line
, ofp
, table
, discards
,
161 autoclosing
, endchar
="]",
162 depth
=depth
+ len(stack
))
165 # handle attribute mappings here:
166 for attrname
in params
:
169 if type(attrname
) is type(""):
170 m
= _optional_rx
.match(line
)
172 line
= line
[m
.end():]
173 ofp
.write("A%s TOKEN %s\n"
174 % (attrname
, encode(m
.group(1))))
175 elif type(attrname
) is type(()):
176 # This is a sub-element; but don't place the
177 # element we found on the stack (\section-like)
178 pushing(macroname
, "b", len(stack
) + depth
)
179 stack
.append(macroname
)
180 ofp
.write("(%s\n" % macroname
)
181 macroname
= attrname
[0]
182 m
= _start_group_rx
.match(line
)
184 line
= line
[m
.end():]
185 elif type(attrname
) is type([]):
186 # A normal subelement.
187 attrname
= attrname
[0]
190 ofp
.write("(%s\n" % macroname
)
191 pushing(macroname
, "c", len(stack
) + depth
)
192 ofp
.write("(%s\n" % attrname
)
193 pushing(attrname
, "sub-elem", len(stack
) + depth
+ 1)
194 line
= subconvert(skip_white(line
)[1:], ofp
, table
,
195 discards
, autoclosing
, endchar
="}",
196 depth
=depth
+ len(stack
) + 2)
197 popping(attrname
, "sub-elem", len(stack
) + depth
+ 1)
198 ofp
.write(")%s\n" % attrname
)
200 m
= _parameter_rx
.match(line
)
202 raise LaTeXFormatError(
203 "could not extract parameter %s for %s: %s"
204 % (attrname
, macroname
, `line
[:100]`
))
206 if _token_rx
.match(value
):
210 ofp
.write("A%s %s %s\n"
211 % (attrname
, dtype
, encode(value
)))
212 line
= line
[m
.end():]
213 if params
and type(params
[-1]) is type('') \
214 and (not empty
) and not environ
:
215 # attempt to strip off next '{'
216 m
= _start_group_rx
.match(line
)
218 raise LaTeXFormatError(
219 "non-empty element '%s' has no content: %s"
220 % (macroname
, line
[:12]))
221 line
= line
[m
.end():]
223 ofp
.write("(%s\n" % macroname
)
224 pushing(macroname
, "d", len(stack
) + depth
)
227 stack
.append(macroname
)
230 if line
[0] == endchar
and not stack
:
232 sys
.stderr
.write("subconvert() --> %s\n" % `line
[1:21]`
)
236 macroname
= stack
[-1]
237 conversion
= table
.get(macroname
)
239 and macroname
not in discards \
240 and type(conversion
) is not type(""):
241 # otherwise, it was just a bare group
242 ofp
.write(")%s\n" % stack
[-1])
243 popping(macroname
, "d", len(stack
) + depth
- 1)
248 pushing("", "e", len(stack
) + depth
)
252 if line
[0] == "\\" and line
[1] in ESCAPED_CHARS
:
253 ofp
.write("-%s\n" % encode(line
[1]))
256 if line
[:2] == r
"\\":
257 ofp
.write("(BREAK\n)BREAK\n")
260 m
= _text_rx
.match(line
)
262 text
= encode(m
.group())
263 ofp
.write("-%s\n" % text
)
264 line
= line
[m
.end():]
266 # special case because of \item[]
271 # avoid infinite loops
275 raise LaTeXFormatError("could not identify markup: %s%s"
276 % (`line
[:100]`
, extra
))
277 while stack
and stack
[-1] in autoclosing
:
279 ofp
.write(")%s\n" % stack
[-1])
280 popping(stack
[-1], "e", len(stack
) + depth
- 1)
283 raise LaTeXFormatError("elements remain on stack: "
284 + string
.join(stack
))
285 # otherwise we just ran out of input here...
288 def convert(ifp
, ofp
, table
={}, discards
=(), autoclosing
=()):
289 lines
= string
.split(ifp
.read(), "\n")
290 for i
in range(len(lines
)):
291 lines
[i
] = string
.rstrip(lines
[i
])
292 data
= string
.join(lines
, "\n")
294 subconvert(data
, ofp
, table
, discards
, autoclosing
)
295 except IOError, (err
, msg
):
296 if err
!= errno
.EPIPE
:
300 def skip_white(line
):
301 while line
and line
[0] in " %\n\t":
302 line
= string
.lstrip(line
[1:])
307 if len(sys
.argv
) == 2:
308 ifp
= open(sys
.argv
[1])
310 elif len(sys
.argv
) == 3:
311 ifp
= open(sys
.argv
[1])
312 ofp
= open(sys
.argv
[2], "w")
317 # entries have the form:
318 # name: ([attribute names], is1stOptional, isEmpty, isEnv, nocontent)
319 # attribute names can be:
320 # "string" -- normal attribute
321 # ("string",) -- sub-element with content of macro; like for \section
322 # ["string"] -- sub-element
323 "appendix": ([], 0, 1, 0, 0),
324 "bifuncindex": (["name"], 0, 1, 0, 0),
325 "catcode": ([], 0, 1, 0, 0),
326 "cfuncdesc": (["type", "name", ("args",)], 0, 0, 1, 0),
327 "chapter": ([("title",)], 0, 0, 0, 0),
328 "chapter*": ([("title",)], 0, 0, 0, 0),
329 "classdesc": (["name", ("args",)], 0, 0, 1, 0),
330 "ctypedesc": (["name"], 0, 0, 1, 0),
331 "cvardesc": (["type", "name"], 0, 0, 1, 0),
332 "datadesc": (["name"], 0, 0, 1, 0),
333 "declaremodule": (["id", "type", "name"], 1, 1, 0, 0),
334 "deprecated": (["release"], 0, 0, 0, 0),
335 "documentclass": (["classname"], 0, 1, 0, 0),
336 "excdesc": (["name"], 0, 0, 1, 0),
337 "funcdesc": (["name", ("args",)], 0, 0, 1, 0),
338 "funcdescni": (["name", ("args",)], 0, 0, 1, 0),
339 "funcline": (["name"], 0, 0, 0, 0),
340 "funclineni": (["name"], 0, 0, 0, 0),
341 "geq": ([], 0, 1, 0, 0),
342 "hline": ([], 0, 1, 0, 0),
343 "indexii": (["ie1", "ie2"], 0, 1, 0, 0),
344 "indexiii": (["ie1", "ie2", "ie3"], 0, 1, 0, 0),
345 "indexiv": (["ie1", "ie2", "ie3", "ie4"], 0, 1, 0, 0),
346 "indexname": ([], 0, 0, 0, 0),
347 "input": (["source"], 0, 1, 0, 0),
348 "item": ([("leader",)], 1, 0, 0, 0),
349 "label": (["id"], 0, 1, 0, 0),
350 "labelwidth": ([], 0, 1, 0, 0),
351 "LaTeX": ([], 0, 1, 0, 0),
352 "leftmargin": ([], 0, 1, 0, 0),
353 "leq": ([], 0, 1, 0, 0),
354 "lineii": ([["entry"], ["entry"]], 0, 0, 0, 1),
355 "lineiii": ([["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
356 "lineiv": ([["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 0, 1),
357 "localmoduletable": ([], 0, 1, 0, 0),
358 "makeindex": ([], 0, 1, 0, 0),
359 "makemodindex": ([], 0, 1, 0, 0),
360 "maketitle": ([], 0, 1, 0, 0),
361 "manpage": (["name", "section"], 0, 1, 0, 0),
362 "memberdesc": (["class", "name"], 1, 0, 1, 0),
363 "methoddesc": (["class", "name", ("args",)], 1, 0, 1, 0),
364 "methoddescni": (["class", "name", ("args",)], 1, 0, 1, 0),
365 "methodline": (["class", "name"], 1, 0, 0, 0),
366 "methodlineni": (["class", "name"], 1, 0, 0, 0),
367 "moduleauthor": (["name", "email"], 0, 1, 0, 0),
368 "opcodedesc": (["name", "var"], 0, 0, 1, 0),
369 "par": ([], 0, 1, 0, 0),
370 "paragraph": ([("title",)], 0, 0, 0, 0),
371 "renewcommand": (["macro"], 0, 0, 0, 0),
372 "rfc": (["num"], 0, 1, 0, 0),
373 "section": ([("title",)], 0, 0, 0, 0),
374 "sectionauthor": (["name", "email"], 0, 1, 0, 0),
375 "seemodule": (["ref", "name"], 1, 0, 0, 0),
376 "stindex": (["type"], 0, 1, 0, 0),
377 "subparagraph": ([("title",)], 0, 0, 0, 0),
378 "subsection": ([("title",)], 0, 0, 0, 0),
379 "subsubsection": ([("title",)], 0, 0, 0, 0),
380 "list": (["bullet", "init"], 0, 0, 1, 0),
381 "tableii": (["colspec", "style",
382 ["entry"], ["entry"]], 0, 0, 1, 0),
383 "tableiii": (["colspec", "style",
384 ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
385 "tableiv": (["colspec", "style",
386 ["entry"], ["entry"], ["entry"], ["entry"]], 0, 0, 1, 0),
387 "version": ([], 0, 1, 0, 0),
388 "versionadded": (["version"], 0, 1, 0, 0),
389 "versionchanged": (["version"], 0, 1, 0, 0),
390 "withsubitem": (["text"], 0, 0, 0, 0),
392 "ABC": ([], 0, 1, 0, 0),
393 "ASCII": ([], 0, 1, 0, 0),
394 "C": ([], 0, 1, 0, 0),
395 "Cpp": ([], 0, 1, 0, 0),
396 "EOF": ([], 0, 1, 0, 0),
397 "e": ([], 0, 1, 0, 0),
398 "ldots": ([], 0, 1, 0, 0),
399 "NULL": ([], 0, 1, 0, 0),
400 "POSIX": ([], 0, 1, 0, 0),
401 "UNIX": ([], 0, 1, 0, 0),
403 # Things that will actually be going away!
405 "fi": ([], 0, 1, 0, 0),
406 "ifhtml": ([], 0, 1, 0, 0),
407 "makeindex": ([], 0, 1, 0, 0),
408 "makemodindex": ([], 0, 1, 0, 0),
409 "maketitle": ([], 0, 1, 0, 0),
410 "noindent": ([], 0, 1, 0, 0),
411 "protect": ([], 0, 1, 0, 0),
412 "tableofcontents": ([], 0, 1, 0, 0),
414 discards
=["fi", "ifhtml", "makeindex", "makemodindex", "maketitle",
415 "noindent", "tableofcontents"],
416 autoclosing
=["chapter", "section", "subsection", "subsubsection",
417 "paragraph", "subparagraph", ])
420 if __name__
== "__main__":