2 # partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
3 # and generate texinfo source.
5 # This is *not* a good example of good programming practices. In fact, this
6 # file could use a complete rewrite, in order to become faster, more
7 # easy extensible and maintainable.
9 # However, I added some comments on a few places for the pityful person who
10 # would ever need to take a look into this file.
12 # Have I been clear enough??
17 import sys
, string
, regex
, getopt
, os
19 # Different parse modes for phase 1
28 the_modes
= MODE_REGULAR
, MODE_VERBATIM
, MODE_CS_SCAN
, MODE_COMMENT
, \
29 MODE_MATH
, MODE_DMATH
, MODE_GOBBLEWHITE
31 # Show the neighbourhood of the scanned buffer
32 def epsilon(buf
, where
):
33 wmt
, wpt
= where
- 10, where
+ 10
38 return ' Context ' + `buf
[wmt
:where
]`
+ '.' + `buf
[where
:wpt
]`
+ '.'
40 # Should return the line number. never worked
43 return ' Line ' + `lineno`
+ '.'
45 # Displays the recursion level.
47 return ' Level ' + `lvl`
+ '.'
49 # Combine the three previous functions. Used often.
50 def lle(lvl
, buf
, where
):
51 return lv(lvl
) + lin() + epsilon(buf
, where
)
54 # This class is only needed for _symbolic_ representation of the parse mode.
57 if arg
not in the_modes
:
58 raise ValueError, 'mode not in the_modes'
62 def __cmp__(self
, other
):
63 if type(self
) != type(other
):
65 return cmp(self
.mode
, other
.mode
)
68 if self
.mode
== MODE_REGULAR
:
70 elif self
.mode
== MODE_VERBATIM
:
71 return 'MODE_VERBATIM'
72 elif self
.mode
== MODE_CS_SCAN
:
74 elif self
.mode
== MODE_COMMENT
:
76 elif self
.mode
== MODE_MATH
:
78 elif self
.mode
== MODE_DMATH
:
80 elif self
.mode
== MODE_GOBBLEWHITE
:
81 return 'MODE_GOBBLEWHITE'
83 raise ValueError, 'mode not in the_modes'
85 # just a wrapper around a class initialisation
87 return Mode().init(arg
)
90 # After phase 1, the text consists of chunks, with a certain type
91 # this type will be assigned to the chtype member of the chunk
92 # the where-field contains the file position where this is found
93 # and the data field contains (1): a tuple describing start- end end
94 # positions of the substring (can be used as slice for the buf-variable),
95 # (2) just a string, mostly generated by the changeit routine,
96 # or (3) a list, describing a (recursive) subgroup of chunks
97 PLAIN
= 0 # ASSUME PLAINTEXT, data = the text
98 GROUP
= 1 # GROUP ({}), data = [chunk, chunk,..]
99 CSNAME
= 2 # CONTROL SEQ TOKEN, data = the command
100 COMMENT
= 3 # data is the actual comment
101 DMATH
= 4 # DISPLAYMATH, data = [chunk, chunk,..]
102 MATH
= 5 # MATH, see DISPLAYMATH
103 OTHER
= 6 # CHAR WITH CATCODE OTHER, data = char
104 ACTIVE
= 7 # ACTIVE CHAR
105 GOBBLEDWHITE
= 8 # Gobbled LWSP, after CSNAME
106 ENDLINE
= 9 # END-OF-LINE, data = '\n'
107 DENDLINE
= 10 # DOUBLE EOL, data='\n', indicates \par
108 ENV
= 11 # LaTeX-environment
109 # data =(envname,[ch,ch,ch,.])
110 CSLINE
= 12 # for texi: next chunk will be one group
111 # of args. Will be set all on 1 line
112 IGNORE
= 13 # IGNORE this data
113 ENDENV
= 14 # TEMP END OF GROUP INDICATOR
114 IF
= 15 # IF-directive
115 # data = (flag,negate,[ch, ch, ch,...])
116 the_types
= PLAIN
, GROUP
, CSNAME
, COMMENT
, DMATH
, MATH
, OTHER
, ACTIVE
, \
117 GOBBLEDWHITE
, ENDLINE
, DENDLINE
, ENV
, CSLINE
, IGNORE
, ENDENV
, IF
119 # class, just to display symbolic name
121 def init(self
, chunk_type
):
122 if chunk_type
not in the_types
:
123 raise 'ValueError', 'chunk_type not in the_types'
124 self
.chunk_type
= chunk_type
127 def __cmp__(self
, other
):
128 if type(self
) != type(other
):
129 other
= chunk_type(other
)
130 return cmp(self
.chunk_type
, other
.chunk_type
)
133 if self
.chunk_type
== PLAIN
:
135 elif self
.chunk_type
== GROUP
:
137 elif self
.chunk_type
== CSNAME
:
139 elif self
.chunk_type
== COMMENT
:
141 elif self
.chunk_type
== DMATH
:
143 elif self
.chunk_type
== MATH
:
145 elif self
.chunk_type
== OTHER
:
147 elif self
.chunk_type
== ACTIVE
:
149 elif self
.chunk_type
== GOBBLEDWHITE
:
150 return 'GOBBLEDWHITE'
151 elif self
.chunk_type
== DENDLINE
:
153 elif self
.chunk_type
== ENDLINE
:
155 elif self
.chunk_type
== ENV
:
157 elif self
.chunk_type
== CSLINE
:
159 elif self
.chunk_type
== IGNORE
:
161 elif self
.chunk_type
== ENDENV
:
163 elif self
.chunk_type
== IF
:
166 raise ValueError, 'chunk_type not in the_types'
169 def chunk_type(type):
170 return ChunkType().init(type)
172 # store a type object of the ChunkType-class-instance...
173 chunk_type_type
= type(chunk_type(0))
175 # this class contains a part of the parsed buffer
177 def init(self
, chtype
, where
, data
):
178 if type(chtype
) != chunk_type_type
:
179 chtype
= chunk_type(chtype
)
181 if type(where
) != type(0):
182 raise TypeError, '\'where\' is not a number'
185 ##print 'CHUNK', self
189 return 'chunk' + `self
.chtype
, self
.where
, self
.data`
192 def chunk(chtype
, where
, data
):
193 return Chunk().init(chtype
, where
, data
)
197 error
= 'partparse.error'
239 # Show a list of catcode-name-symbols
243 result
= result
+ cc_names
[i
] + ', '
244 return '[' + result
[:-2] + ']'
246 # the name of the catcode (ACTIVE, OTHER, etc.)
248 return cc_names
[code
]
251 # Which catcodes make the parser stop parsing regular plaintext
252 regular_stopcodes
= [CC_ESCAPE
, CC_LBRACE
, CC_RBRACE
, CC_MATHSHIFT
, \
253 CC_ALIGNMENT
, CC_PARAMETER
, CC_SUPERSCRIPT
, CC_SUBSCRIPT
, \
254 CC_IGNORE
, CC_ACTIVE
, CC_COMMENT
, CC_INVALID
, CC_ENDLINE
]
256 # same for scanning a control sequence name
257 csname_scancodes
= [CC_LETTER
]
259 # same for gobbling LWSP
260 white_scancodes
= [CC_WHITE
]
261 ##white_scancodes = [CC_WHITE, CC_ENDLINE]
263 # make a list of all catcode id's, except for catcode ``other''
264 all_but_other_codes
= range(16)
265 del all_but_other_codes
[CC_OTHER
]
266 ##print all_but_other_codes
268 # when does a comment end
269 comment_stopcodes
= [CC_ENDLINE
]
271 # gather all characters together, specified by a list of catcodes
272 def code2string(cc
, codelist
):
273 ##print 'code2string: codelist = ' + pcl(codelist),
275 for category
in codelist
:
277 result
= result
+ cc
[category
]
278 ##print 'result = ' + `result`
281 # automatically generate all characters of catcode other, being the
282 # complement set in the ASCII range (128 characters)
283 def make_other_codes(cc
):
284 otherchars
= range(256) # could be made 256, no problem
285 for category
in all_but_other_codes
:
287 for c
in cc
[category
]:
288 otherchars
[ord(c
)] = None
292 result
= result
+ chr(i
)
295 # catcode dump (which characters have which catcodes).
296 def dump_cc(name
, cc
):
298 ##print '=' * (8+len(name))
300 raise TypeError, 'cc not good cat class'
301 ## for i in range(16):
302 ## print pc(i) + '\t' + `cc[i]`
305 # In the beginning,....
306 epoch_cc
= [None] * 16
307 ##dump_cc('epoch_cc', epoch_cc)
311 initex_cc
= epoch_cc
[:]
312 initex_cc
[CC_ESCAPE
] = '\\'
313 initex_cc
[CC_ENDLINE
], initex_cc
[CC_IGNORE
], initex_cc
[CC_WHITE
] = \
315 initex_cc
[CC_LETTER
] = string
.uppercase
+ string
.lowercase
316 initex_cc
[CC_COMMENT
], initex_cc
[CC_INVALID
] = '%', '\x7F'
317 #initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
318 ##dump_cc('initex_cc', initex_cc)
321 # LPLAIN: LaTeX catcode setting (see lplain.tex)
322 lplain_cc
= initex_cc
[:]
323 lplain_cc
[CC_LBRACE
], lplain_cc
[CC_RBRACE
] = '{', '}'
324 lplain_cc
[CC_MATHSHIFT
] = '$'
325 lplain_cc
[CC_ALIGNMENT
] = '&'
326 lplain_cc
[CC_PARAMETER
] = '#'
327 lplain_cc
[CC_SUPERSCRIPT
] = '^\x0B' # '^' and C-k
328 lplain_cc
[CC_SUBSCRIPT
] = '_\x01' # '_' and C-a
329 lplain_cc
[CC_WHITE
] = lplain_cc
[CC_WHITE
] + '\t'
330 lplain_cc
[CC_ACTIVE
] = '~\x0C' # '~' and C-l
331 lplain_cc
[CC_OTHER
] = make_other_codes(lplain_cc
)
332 ##dump_cc('lplain_cc', lplain_cc)
335 # Guido's LaTeX environment catcoded '_' as ``other''
336 # my own purpose catlist
338 my_cc
[CC_SUBSCRIPT
] = my_cc
[CC_SUBSCRIPT
][1:] # remove '_' here
339 my_cc
[CC_OTHER
] = my_cc
[CC_OTHER
] + '_' # add it to OTHER list
340 dump_cc('my_cc', my_cc
)
344 # needed for un_re, my equivalent for regexp-quote in Emacs
345 re_meaning
= '\\[]^$'
351 result
= result
+ '\\'
355 # NOTE the negate ('^') operator in *some* of the regexps below
356 def make_rc_regular(cc
):
357 # problems here if '[]' are included!!
358 return regex
.compile('[' + code2string(cc
, regular_stopcodes
) + ']')
360 def make_rc_cs_scan(cc
):
361 return regex
.compile('[^' + code2string(cc
, csname_scancodes
) + ']')
363 def make_rc_comment(cc
):
364 return regex
.compile('[' + code2string(cc
, comment_stopcodes
) + ']')
366 def make_rc_endwhite(cc
):
367 return regex
.compile('[^' + code2string(cc
, white_scancodes
) + ']')
371 # regular: normal mode:
372 rc_regular
= make_rc_regular(my_cc
)
374 # scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
375 rc_cs_scan
= make_rc_cs_scan(my_cc
)
376 rc_comment
= make_rc_comment(my_cc
)
377 rc_endwhite
= make_rc_endwhite(my_cc
)
380 # parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
381 # RECURSION-LEVEL will is incremented on entry.
382 # result contains the list of chunks returned
383 # together with this list, the buffer position is returned
385 # RECURSION-LEVEL will be set to zero *again*, when recursively a
386 # {,D}MATH-mode scan has been enetered.
387 # This has been done in order to better check for environment-mismatches
389 def parseit(buf
, *rest
):
393 parsemode
, start
, lvl
= rest
395 parsemode
, start
, lvl
= rest
+ (0, )
397 parsemode
, start
, lvl
= rest
+ (0, 0)
399 parsemode
, start
, lvl
= mode(MODE_REGULAR
), 0, 0
401 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
404 if lvl
== 0 and parsemode
== mode(MODE_REGULAR
):
408 ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
411 # some of the more regular modes...
414 if parsemode
in (mode(MODE_REGULAR
), mode(MODE_DMATH
), mode(MODE_MATH
)):
420 #print '\tnew round: ' + epsilon(buf, where)
422 if lvl
> 1 or curpmode
!= mode(MODE_REGULAR
):
423 # not the way we started...
424 raise EOFError, 'premature end of file.' + lle(lvl
, buf
, where
)
425 # the real ending of lvl-1 parse
428 pos
= rc_regular
.search(buf
, where
)
434 newpos
, c
= pos
, chunk(PLAIN
, where
, (where
, pos
))
440 # ok, pos == where and pos != end
442 foundchar
= buf
[where
]
443 if foundchar
in my_cc
[CC_LBRACE
]:
444 # recursive subgroup parse...
445 newpos
, data
= parseit(buf
, curpmode
, where
+1, lvl
)
446 result
.append(chunk(GROUP
, where
, data
))
448 elif foundchar
in my_cc
[CC_RBRACE
]:
450 raise error
, 'ENDGROUP while in base level.' + lle(lvl
, buf
, where
)
451 if lvl
== 1 and mode
!= mode(MODE_REGULAR
):
452 raise error
, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
453 return where
+ 1, result
455 elif foundchar
in my_cc
[CC_ESCAPE
]:
457 # call the routine that actually deals with
458 # this problem. If do_ret is None, than
459 # return the value of do_ret
461 # Note that handle_cs might call this routine
462 # recursively again...
464 do_ret
, newpos
= handlecs(buf
, where
, \
465 curpmode
, lvl
, result
, end
)
469 elif foundchar
in my_cc
[CC_COMMENT
]:
470 newpos
, data
= parseit(buf
, \
471 mode(MODE_COMMENT
), where
+1, lvl
)
472 result
.append(chunk(COMMENT
, where
, data
))
474 elif foundchar
in my_cc
[CC_MATHSHIFT
]:
475 # note that recursive calls to math-mode
476 # scanning are called with recursion-level 0
477 # again, in order to check for bad mathend
479 if where
+ 1 != end
and \
483 # double mathshift, e.g. '$$'
485 if curpmode
== mode(MODE_REGULAR
):
486 newpos
, data
= parseit(buf
, \
489 result
.append(chunk(DMATH
, \
491 elif curpmode
== mode(MODE_MATH
):
492 raise error
, 'wrong math delimiiter' + lin() + epsilon(buf
, where
)
494 raise error
, 'bad mathend.' + \
497 return where
+ 2, result
500 # single math shift, e.g. '$'
502 if curpmode
== mode(MODE_REGULAR
):
503 newpos
, data
= parseit(buf
, \
506 result
.append(chunk(MATH
, \
508 elif curpmode
== mode(MODE_DMATH
):
509 raise error
, 'wrong math delimiiter' + lin() + epsilon(buf
, where
)
511 raise error
, 'bad mathend.' + \
514 return where
+ 1, result
516 elif foundchar
in my_cc
[CC_IGNORE
]:
517 print 'warning: ignored char', `foundchar`
520 elif foundchar
in my_cc
[CC_ACTIVE
]:
521 result
.append(chunk(ACTIVE
, where
, foundchar
))
524 elif foundchar
in my_cc
[CC_INVALID
]:
525 raise error
, 'invalid char ' + `foundchar`
528 elif foundchar
in my_cc
[CC_ENDLINE
]:
530 # after an end of line, eat the rest of
531 # whitespace on the beginning of the next line
532 # this is what LaTeX more or less does
534 # also, try to indicate double newlines (\par)
538 newpos
, dummy
= parseit(buf
, mode(MODE_GOBBLEWHITE
), where
+ 1, lvl
)
539 if newpos
!= end
and buf
[newpos
] in \
541 result
.append(chunk(DENDLINE
, \
542 savedwhere
, foundchar
))
544 result
.append(chunk(ENDLINE
, \
545 savedwhere
, foundchar
))
547 result
.append(chunk(OTHER
, where
, foundchar
))
550 elif parsemode
== mode(MODE_CS_SCAN
):
552 # scan for a control sequence token. `\ape', `\nut' or `\%'
555 raise EOFError, 'can\'t find end of csname'
556 pos
= rc_cs_scan
.search(buf
, start
)
560 # first non-letter right where we started the search
561 # ---> the control sequence name consists of one single
562 # character. Also: don't eat white space...
563 if buf
[pos
] in my_cc
[CC_ENDLINE
]:
566 return pos
, (start
, pos
)
572 pos2
, dummy
= parseit(buf
, \
573 mode(MODE_GOBBLEWHITE
), spos
, lvl
)
574 return pos2
, (start
, pos
)
576 elif parsemode
== mode(MODE_GOBBLEWHITE
):
579 pos
= rc_endwhite
.search(buf
, start
)
582 return pos
, (start
, pos
)
584 elif parsemode
== mode(MODE_COMMENT
):
585 pos
= rc_comment
.search(buf
, start
)
588 print 'no newline perhaps?'
589 raise EOFError, 'can\'t find end of comment'
591 pos2
, dummy
= parseit(buf
, mode(MODE_GOBBLEWHITE
), pos
, lvl
)
592 return pos2
, (start
, pos
)
596 raise error
, 'Unknown mode (' + `parsemode`
+ ')'
599 #moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
601 #boxcommands = 'mbox', 'fbox'
602 #defcommands = 'def', 'newcommand'
604 endverbstr
= '\\end{verbatim}'
606 re_endverb
= regex
.compile(un_re(endverbstr
))
609 # handlecs: helper function for parseit, for the special thing we might
610 # wanna do after certain command control sequences
611 # returns: None or return_data, newpos
613 # in the latter case, the calling function is instructed to immediately
614 # return with the data in return_data
616 def handlecs(buf
, where
, curpmode
, lvl
, result
, end
):
619 # get the control sequence name...
620 newpos
, data
= parseit(buf
, mode(MODE_CS_SCAN
), where
+1, lvl
)
623 if s(buf
, data
) in ('begin', 'end'):
624 # skip the expected '{' and get the LaTeX-envname '}'
625 newpos
, data
= parseit(buf
, mode(MODE_REGULAR
), newpos
+1, lvl
)
627 raise error
, 'expected 1 chunk of data.' + \
630 # yucky, we've got an environment
631 envname
= s(buf
, data
[0].data
)
632 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
633 if s(buf
, saveddata
) == 'begin' and envname
== 'verbatim':
634 # verbatim deserves special treatment
635 pos
= re_endverb
.search(buf
, newpos
)
637 raise error
, `endverbstr`
+ ' not found.' + lle(lvl
, buf
, where
)
638 result
.append(chunk(ENV
, where
, (envname
, [chunk(PLAIN
, newpos
, (newpos
, pos
))])))
639 newpos
= pos
+ len(endverbstr
)
641 elif s(buf
, saveddata
) == 'begin':
642 # start parsing recursively... If that parse returns
643 # from an '\end{...}', then should the last item of
644 # the returned data be a string containing the ended
646 newpos
, data
= parseit(buf
, curpmode
, newpos
, lvl
)
647 if not data
or type(data
[-1]) != type(''):
648 raise error
, 'missing \'end\'' + lle(lvl
, buf
, where
) + epsilon(buf
, newpos
)
651 if retenv
!= envname
:
652 #[`retenv`, `envname`]
653 raise error
, 'environments do not match.' + \
654 lle(lvl
, buf
, where
) + \
656 result
.append(chunk(ENV
, where
, (retenv
, data
)))
658 # 'end'... append the environment name, as just
659 # pointed out, and order parsit to return...
660 result
.append(envname
)
661 ##print 'POINT of return: ' + epsilon(buf, newpos)
662 # the tuple will be returned by parseit
663 return (newpos
, result
), newpos
665 # end of \begin ... \end handling
667 elif s(buf
, data
)[0:2] == 'if':
668 # another scary monster: the 'if' directive
669 flag
= s(buf
, data
)[2:]
671 # recursively call parseit, just like environment above..
672 # the last item of data should contain the if-termination
673 # e.g., 'else' of 'fi'
674 newpos
, data
= parseit(buf
, curpmode
, newpos
, lvl
)
675 if not data
or data
[-1] not in ('else', 'fi'):
676 raise error
, 'wrong if... termination' + \
677 lle(lvl
, buf
, where
) + epsilon(buf
, newpos
)
681 # 0 means dont_negate flag
682 result
.append(chunk(IF
, where
, (flag
, 0, data
)))
684 # do the whole thing again, there is only one way
685 # to end this one, by 'fi'
686 newpos
, data
= parseit(buf
, curpmode
, newpos
, lvl
)
687 if not data
or data
[-1] not in ('fi', ):
688 raise error
, 'wrong if...else... termination' \
689 + lle(lvl
, buf
, where
) \
690 + epsilon(buf
, newpos
)
694 result
.append(chunk(IF
, where
, (flag
, 1, data
)))
695 #done implicitely: return None, newpos
697 elif s(buf
, data
) in ('else', 'fi'):
698 result
.append(s(buf
, data
))
699 # order calling party to return tuple
700 return (newpos
, result
), newpos
702 # end of \if, \else, ... \fi handling
704 elif s(buf
, saveddata
) == 'verb':
706 result
.append(chunk(CSNAME
, where
, data
))
708 raise error
, 'premature end of command.' + lle(lvl
, buf
, where
)
710 ##print 'VERB: delimchar ' + `delimchar`
711 pos
= regex
.compile(un_re(delimchar
)).search(buf
, x2
+ 1)
713 raise error
, 'end of \'verb\' argument (' + \
714 `delimchar`
+ ') not found.' + \
716 result
.append(chunk(GROUP
, x2
, [chunk(PLAIN
, x2
+1, (x2
+1, pos
))]))
719 result
.append(chunk(CSNAME
, where
, data
))
722 # this is just a function to get the string value if the possible data-tuple
724 if type(data
) == type(''):
726 if len(data
) != 2 or not (type(data
[0]) == type(data
[1]) == type(0)):
727 raise TypeError, 'expected tuple of 2 integers'
732 ##length, data1, i = getnextarg(length, buf, pp, i + 1)
734 # make a deep-copy of some chunks
738 result
.append(chunkcopy(x
))
743 # copy a chunk, would better be a method of class Chunk...
745 if ch
.chtype
== chunk_type(GROUP
):
747 for i
in range(len(listc
)):
748 listc
[i
] = chunkcopy(listc
[i
])
749 return chunk(GROUP
, ch
.where
, listc
)
751 return chunk(ch
.chtype
, ch
.where
, ch
.data
)
754 # get next argument for TeX-macro, flatten a group (insert between)
755 # or return Command Sequence token, or give back one character
756 def getnextarg(length
, buf
, pp
, item
):
758 ##wobj = Wobj().init()
759 ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
760 ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
762 while item
< length
and pp
[item
].chtype
== chunk_type(ENDLINE
):
766 raise error
, 'no next arg.' + epsilon(buf
, pp
[-1].where
)
767 if pp
[item
].chtype
== chunk_type(GROUP
):
768 newpp
= pp
[item
].data
772 length
= length
+ len(newpp
)
773 pp
[item
:item
] = newpp
774 item
= item
+ len(newpp
)
777 dumpit(buf
, wobj
.write
, newpp
)
778 ##print 'GETNEXTARG: inserted ' + `wobj.data`
780 elif pp
[item
].chtype
== chunk_type(PLAIN
):
782 print 'WARNING: grabbing one char'
783 if len(s(buf
, pp
[item
].data
)) > 1:
784 pp
.insert(item
, chunk(PLAIN
, pp
[item
].where
, s(buf
, pp
[item
].data
)[:1]))
785 item
, length
= item
+1, length
+1
786 pp
[item
].data
= s(buf
, pp
[item
].data
)[1:]
792 str = `
s(buf
, ch
.data
)`
796 str = str[:400] + '...'
797 print 'GETNEXTARG:', ch
.chtype
, 'not handled, data ' + str
801 # this one is needed to find the end of LaTeX's optional argument, like
803 re_endopt
= regex
.compile(']')
805 # get a LaTeX-optional argument, you know, the square braces '[' and ']'
806 def getoptarg(length
, buf
, pp
, item
):
809 dumpit(buf
, wobj
.write
, pp
[item
:min(length
, item
+ 5)])
810 ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
812 if item
>= length
or \
813 pp
[item
].chtype
!= chunk_type(PLAIN
) or \
814 s(buf
, pp
[item
].data
)[0] != '[':
817 pp
[item
].data
= s(buf
, pp
[item
].data
)[1:]
818 if len(pp
[item
].data
) == 0:
824 raise error
, 'No end of optional arg found'
825 if pp
[item
].chtype
== chunk_type(PLAIN
):
826 text
= s(buf
, pp
[item
].data
)
827 pos
= re_endopt
.search(text
)
829 pp
[item
].data
= text
[:pos
]
837 while text
and text
[0] in ' \t':
841 pp
.insert(item
, chunk(PLAIN
, 0, text
))
848 # Wobj just add write-requests to the ``data'' attribute
853 def write(self
, data
):
854 self
.data
= self
.data
+ data
856 # ignore these commands
857 ignoredcommands
= ('bcode', 'ecode', 'optional')
858 # map commands like these to themselves as plaintext
859 wordsselves
= ('UNIX', 'ABC', 'C', 'ASCII', 'EOF')
860 # \{ --> {, \} --> }, etc
861 themselves
= ('{', '}', '.', '@') + wordsselves
862 # these ones also themselves (see argargs macro in myformat.sty)
863 inargsselves
= (',', '[', ']', '(', ')')
864 # this is how *I* would show the difference between emph and strong
865 # code 1 means: fold to uppercase
866 markcmds
= {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
867 'strong': ('*', '*')}
869 # recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
870 fontchanges
= {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
872 # transparent for these commands
873 for_texi
= ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', \
877 # try to remove macros and return flat text
878 def flattext(buf
, pp
):
880 ##print '---> FLATTEXT ' + `pp`
883 i
, length
= 0, len(pp
)
885 if len(pp
) != length
:
886 raise 'FATAL', 'inconsistent length'
891 if ch
.chtype
== chunk_type(PLAIN
):
893 elif ch
.chtype
== chunk_type(CSNAME
):
894 if s(buf
, ch
.data
) in themselves
or hist
.inargs
and s(buf
, ch
.data
) in inargsselves
:
895 ch
.chtype
= chunk_type(PLAIN
)
896 elif s(buf
, ch
.data
) == 'e':
897 ch
.chtype
= chunk_type(PLAIN
)
899 elif len(s(buf
, ch
.data
)) == 1 \
900 and s(buf
, ch
.data
) in onlylatexspecial
:
901 ch
.chtype
= chunk_type(PLAIN
)
902 # if it is followed by an empty group,
903 # remove that group, it was needed for
906 and pp
[i
].chtype
==chunk_type(GROUP
) \
907 and len(pp
[i
].data
) == 0:
911 elif s(buf
, ch
.data
) in markcmds
.keys():
912 length
, newi
= getnextarg(length
, buf
, pp
, i
)
913 str = flattext(buf
, pp
[i
:newi
])
915 length
= length
- (newi
- i
)
916 ch
.chtype
= chunk_type(PLAIN
)
917 markcmd
= s(buf
, ch
.data
)
918 x
= markcmds
[markcmd
]
919 if type(x
) == type(()):
923 str = string
.upper(str)
925 raise 'FATAL', 'corrupt markcmds'
928 if s(buf
, ch
.data
) not in ignoredcommands
:
929 print 'WARNING: deleting command ' + `
s(buf
, ch
.data
)`
930 print 'PP' + `pp
[i
-1]`
932 i
, length
= i
-1, length
-1
933 elif ch
.chtype
== chunk_type(GROUP
):
934 length
, newi
= getnextarg(length
, buf
, pp
, i
-1)
936 ## str = flattext(buf, crcopy(pp[i-1:newi]))
938 ## length = length - (newi - i)
939 ## ch.chtype = chunk_type(PLAIN)
944 dumpit(buf
, wobj
.write
, pp
)
945 ##print 'FLATTEXT: RETURNING ' + `wobj.data`
948 # try to generate node names (a bit shorter than the chapter title)
949 # note that the \nodename command (see elsewhere) overules these efforts
950 def invent_node_names(text
):
951 words
= string
.split(text
)
953 ##print 'WORDS ' + `words`
956 and string
.lower(words
[0]) == 'built-in' \
957 and string
.lower(words
[1]) not in ('modules', 'functions'):
959 if len(words
) == 3 and string
.lower(words
[1]) == 'module':
961 if len(words
) == 3 and string
.lower(words
[1]) == 'object':
962 return string
.join(words
[0:2])
963 if len(words
) > 4 and string
.lower(string
.join(words
[-4:])) == \
964 'methods and data attributes':
965 return string
.join(words
[:2])
968 re_commas_etc
= regex
.compile('[,`\'@{}]')
970 re_whitespace
= regex
.compile('[ \t]*')
973 ##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
975 # look if the next non-white stuff is also a command, resulting in skipping
976 # double endlines (DENDLINE) too, and thus omitting \par's
977 # Sometimes this is too much, maybe consider DENDLINE's as stop
978 def next_command_p(length
, buf
, pp
, i
, cmdname
):
985 if ch
.chtype
== chunk_type(ENDLINE
):
987 if ch
.chtype
== chunk_type(DENDLINE
):
989 if ch
.chtype
== chunk_type(PLAIN
):
990 if re_whitespace
.search(s(buf
, ch
.data
)) == 0 and \
991 re_whitespace
.match(s(buf
, ch
.data
)) == len(s(buf
, ch
.data
)):
994 if ch
.chtype
== chunk_type(CSNAME
):
995 if s(buf
, ch
.data
) == cmdname
:
996 return i
# _after_ the command
1001 # things that are special to LaTeX, but not to texi..
1002 onlylatexspecial
= '_~^$#&%'
1016 hist
.enumeratenesting
, hist
.itemizenesting
= 0, 0
1018 out
.doublenodes
= []
1019 out
.doublecindeces
= []
1022 spacech
= [chunk(PLAIN
, 0, ' ')]
1023 commach
= [chunk(PLAIN
, 0, ', ')]
1024 cindexch
= [chunk(CSLINE
, 0, 'cindex')]
1026 # the standard variation in symbols for itemize
1027 itemizesymbols
= ['bullet', 'minus', 'dots']
1029 # same for enumerate
1030 enumeratesymbols
= ['1', 'A', 'a']
1033 ## \begin{ {func,data,exc}desc }{name}...
1034 ## the resulting texi-code is dependent on the contents of indexsubitem
1037 # indexsubitem: `['XXX', 'function']
1039 # deffn {`idxsi`} NAME (FUNCARGS)
1041 # indexsubitem: `['XXX', 'method']`
1043 # defmethod {`idxsi[0]`} NAME (FUNCARGS)
1045 # indexsubitem: `['in', 'module', 'MODNAME']'
1047 # defcv data {`idxsi[1:]`} NAME
1049 # defcv exception {`idxsi[1:]`} NAME
1051 # deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1053 # indexsubitem: `['OBJECT', 'attribute']'
1055 # defcv attribute {`OBJECT`} NAME
1058 ## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1059 ## or \funcline{NAME}{ARGS}
1061 def do_funcdesc(length
, buf
, pp
, i
):
1065 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1066 funcname
= chunk(GROUP
, wh
, pp
[i
:newi
])
1068 length
= length
- (newi
-i
)
1071 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1074 the_args
= [chunk(PLAIN
, wh
, '()'[0])] + \
1076 [chunk(PLAIN
, wh
, '()'[1])]
1078 length
= length
- (newi
-i
)
1080 idxsi
= hist
.indexsubitem
# words
1083 if idxsi
and idxsi
[-1] in ('method', 'attribute'):
1084 command
= 'defmethod'
1085 cat_class
= string
.join(idxsi
[:-1])
1086 elif len(idxsi
) == 2 and idxsi
[1] == 'function':
1088 cat_class
= string
.join(idxsi
)
1089 elif len(idxsi
) == 3 and idxsi
[:2] == ['in', 'module']:
1091 cat_class
= 'function of ' + string
.join(idxsi
[1:])
1094 raise error
, 'don\'t know what to do with indexsubitem ' + `idxsi`
1096 ch
.chtype
= chunk_type(CSLINE
)
1099 cslinearg
= [chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, cat_class
)])]
1100 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1101 cslinearg
.append(funcname
)
1102 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1104 cslinearg
[l
:l
] = the_args
1106 pp
.insert(i
, chunk(GROUP
, wh
, cslinearg
))
1107 i
, length
= i
+1, length
+1
1108 hist
.command
= command
1112 ## this routine will be called on \begin{excdesc}{NAME}
1113 ## or \excline{NAME}
1115 def do_excdesc(length
, buf
, pp
, i
):
1119 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1120 excname
= chunk(GROUP
, wh
, pp
[i
:newi
])
1122 length
= length
- (newi
-i
)
1124 idxsi
= hist
.indexsubitem
# words
1128 if len(idxsi
) == 2 and idxsi
[1] == 'exception':
1130 cat_class
= string
.join(idxsi
)
1131 elif len(idxsi
) == 3 and idxsi
[:2] == ['in', 'module']:
1133 cat_class
= 'exception'
1134 class_class
= string
.join(idxsi
[1:])
1135 elif len(idxsi
) == 4 and idxsi
[:3] == ['exception', 'in', 'module']:
1137 cat_class
= 'exception'
1138 class_class
= string
.join(idxsi
[2:])
1142 raise error
, 'don\'t know what to do with indexsubitem ' + `idxsi`
1144 ch
.chtype
= chunk_type(CSLINE
)
1147 cslinearg
= [chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, cat_class
)])]
1148 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1150 cslinearg
.append(chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, class_class
)]))
1151 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1152 cslinearg
.append(excname
)
1154 pp
.insert(i
, chunk(GROUP
, wh
, cslinearg
))
1155 i
, length
= i
+1, length
+1
1156 hist
.command
= command
1159 ## same for datadesc or dataline...
1160 def do_datadesc(length
, buf
, pp
, i
):
1164 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1165 dataname
= chunk(GROUP
, wh
, pp
[i
:newi
])
1167 length
= length
- (newi
-i
)
1169 idxsi
= hist
.indexsubitem
# words
1173 if idxsi
[-1] == 'attribute':
1175 cat_class
= 'attribute'
1176 class_class
= string
.join(idxsi
[:-1])
1177 elif len(idxsi
) == 3 and idxsi
[:2] == ['in', 'module']:
1180 class_class
= string
.join(idxsi
[1:])
1181 elif len(idxsi
) == 4 and idxsi
[:3] == ['data', 'in', 'module']:
1184 class_class
= string
.join(idxsi
[2:])
1188 raise error
, 'don\'t know what to do with indexsubitem ' + `idxsi`
1190 ch
.chtype
= chunk_type(CSLINE
)
1193 cslinearg
= [chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, cat_class
)])]
1194 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1196 cslinearg
.append(chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, class_class
)]))
1197 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1198 cslinearg
.append(dataname
)
1200 pp
.insert(i
, chunk(GROUP
, wh
, cslinearg
))
1201 i
, length
= i
+1, length
+1
1202 hist
.command
= command
1206 # regular indices: those that are not set in tt font by default....
1207 regindices
= ('cindex', )
1209 # remove illegal characters from node names
1210 def rm_commas_etc(text
):
1214 pos
= re_commas_etc
.search(text
)
1217 result
= result
+ text
[:pos
]
1220 result
= result
+ text
1223 print 'Warning: nodename changhed to ' + `result`
1232 ## changeit: the actual routine, that changes the contents of the parsed
1236 def changeit(buf
, pp
):
1237 global onlylatexspecial
, hist
, out
1239 i
, length
= 0, len(pp
)
1241 # sanity check: length should always equal len(pp)
1242 if len(pp
) != length
:
1243 raise 'FATAL', 'inconsistent length. thought ' + `length`
+ ', but should really be ' + `
len(pp
)`
1249 if type(ch
) == type(''):
1250 #normally, only chunks are present in pp,
1251 # but in some cases, some extra info
1252 # has been inserted, e.g., the \end{...} clauses
1253 raise 'FATAL', 'got string, probably too many ' + `end`
1255 if ch
.chtype
== chunk_type(GROUP
):
1256 # check for {\em ...} constructs
1258 ch
.data
[0].chtype
== chunk_type(CSNAME
) and \
1259 s(buf
, ch
.data
[0].data
) in fontchanges
.keys():
1260 k
= s(buf
, ch
.data
[0].data
)
1262 pp
.insert(i
-1, chunk(CSNAME
, ch
.where
, fontchanges
[k
]))
1263 length
, i
= length
+1, i
+1
1265 # recursively parse the contents of the group
1266 changeit(buf
, ch
.data
)
1268 elif ch
.chtype
== chunk_type(IF
):
1270 flag
, negate
, data
= ch
.data
1271 ##print 'IF: flag, negate = ' + `flag, negate`
1272 if flag
not in flags
.keys():
1273 raise error
, 'unknown flag ' + `flag`
1279 length
, i
= length
-1, i
-1
1282 length
= length
+ len(data
)
1285 elif ch
.chtype
== chunk_type(ENV
):
1287 envname
, data
= ch
.data
1289 #push this environment name on stack
1290 hist
.inenv
.insert(0, envname
)
1292 #append an endenv chunk after grouped data
1293 data
.append(chunk(ENDENV
, ch
.where
, envname
))
1298 i
, length
= i
-1, length
-1
1302 length
= length
+ len(data
)
1304 if envname
== 'verbatim':
1305 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'example'), \
1306 chunk(GROUP
, ch
.where
, [])]
1307 length
, i
= length
+2, i
+2
1309 elif envname
== 'itemize':
1310 if hist
.itemizenesting
> len(itemizesymbols
):
1311 raise error
, 'too deep itemize nesting'
1312 ingroupch
= [chunk(CSNAME
, ch
.where
,\
1313 itemizesymbols
[hist
.itemizenesting
])]
1314 hist
.itemizenesting
= hist
.itemizenesting
+ 1
1315 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'itemize'),\
1316 chunk(GROUP
, ch
.where
, ingroupch
)]
1317 length
, i
= length
+2, i
+2
1319 elif envname
== 'enumerate':
1320 if hist
.enumeratenesting
> len(enumeratesymbols
):
1321 raise error
, 'too deep enumerate nesting'
1322 ingroupch
= [chunk(PLAIN
, ch
.where
,\
1323 enumeratesymbols
[hist
.enumeratenesting
])]
1324 hist
.enumeratenesting
= hist
.enumeratenesting
+ 1
1325 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'enumerate'),\
1326 chunk(GROUP
, ch
.where
, ingroupch
)]
1327 length
, i
= length
+2, i
+2
1329 elif envname
== 'description':
1330 ingroupch
= [chunk(CSNAME
, ch
.where
, 'b')]
1331 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'table'), \
1332 chunk(GROUP
, ch
.where
, ingroupch
)]
1333 length
, i
= length
+2, i
+2
1335 elif envname
== 'tableiii':
1339 #delete tabular format description
1341 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1343 length
= length
- (newi
-i
)
1345 newcode
.append(chunk(CSLINE
, wh
, 'table'))
1346 ingroupch
= [chunk(CSNAME
, wh
, 'asis')]
1347 newcode
.append(chunk(GROUP
, wh
, ingroupch
))
1348 newcode
.append(chunk(CSLINE
, wh
, 'item'))
1350 #get the name of macro for @item
1352 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1355 raise error
, 'Sorry, expected 1 chunk argument'
1356 if pp
[i
].chtype
!= chunk_type(PLAIN
):
1357 raise error
, 'Sorry, expected plain text argument'
1358 hist
.itemargmacro
= s(buf
, pp
[i
].data
)
1360 length
= length
- (newi
-i
)
1362 for count
in range(3):
1363 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1365 chunk(CSNAME
, wh
, 'emph'), \
1366 chunk(GROUP
, 0, pp
[i
:newi
])]
1368 length
= length
- (newi
-i
)
1372 itembody
= itembody
+ \
1373 [chunk(PLAIN
, wh
, ' --- ')] + \
1376 itembody
= emphgroup
1377 newcode
.append(chunk(GROUP
, wh
, itemarg
))
1378 newcode
= newcode
+ itembody
+ [chunk(DENDLINE
, wh
, '\n')]
1381 length
, i
= length
+l
, i
+l
1384 if length
!= len(pp
):
1385 raise 'STILL, SOMETHING wrong', `i`
1388 elif envname
== 'funcdesc':
1389 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ''))
1390 i
, length
= i
+1, length
+1
1391 length
, i
= do_funcdesc(length
, buf
, pp
, i
)
1393 elif envname
== 'excdesc':
1394 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ''))
1395 i
, length
= i
+1, length
+1
1396 length
, i
= do_excdesc(length
, buf
, pp
, i
)
1398 elif envname
== 'datadesc':
1399 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ''))
1400 i
, length
= i
+1, length
+1
1401 length
, i
= do_datadesc(length
, buf
, pp
, i
)
1404 print 'WARNING: don\'t know what to do with env ' + `envname`
1406 elif ch
.chtype
== chunk_type(ENDENV
):
1408 if envname
!= hist
.inenv
[0]:
1409 raise error
, '\'end\' does not match. Name ' + `envname`
+ ', expected ' + `hist
.inenv
[0]`
1412 i
, length
= i
-1, length
-1
1414 if envname
== 'verbatim':
1416 chunk(CSLINE
, ch
.where
, 'end'), \
1417 chunk(GROUP
, ch
.where
, [\
1418 chunk(PLAIN
, ch
.where
, 'example')])]
1419 i
, length
= i
+2, length
+2
1420 elif envname
== 'itemize':
1421 hist
.itemizenesting
= hist
.itemizenesting
- 1
1423 chunk(CSLINE
, ch
.where
, 'end'), \
1424 chunk(GROUP
, ch
.where
, [\
1425 chunk(PLAIN
, ch
.where
, 'itemize')])]
1426 i
, length
= i
+2, length
+2
1427 elif envname
== 'enumerate':
1428 hist
.enumeratenesting
= hist
.enumeratenesting
-1
1430 chunk(CSLINE
, ch
.where
, 'end'), \
1431 chunk(GROUP
, ch
.where
, [\
1432 chunk(PLAIN
, ch
.where
, 'enumerate')])]
1433 i
, length
= i
+2, length
+2
1434 elif envname
== 'description':
1436 chunk(CSLINE
, ch
.where
, 'end'), \
1437 chunk(GROUP
, ch
.where
, [\
1438 chunk(PLAIN
, ch
.where
, 'table')])]
1439 i
, length
= i
+2, length
+2
1440 elif envname
== 'tableiii':
1442 chunk(CSLINE
, ch
.where
, 'end'), \
1443 chunk(GROUP
, ch
.where
, [\
1444 chunk(PLAIN
, ch
.where
, 'table')])]
1445 i
, length
= i
+2, length
+ 2
1446 pp
.insert(i
, chunk(DENDLINE
, ch
.where
, '\n'))
1447 i
, length
= i
+1, length
+1
1449 elif envname
in ('funcdesc', 'excdesc', 'datadesc'):
1451 chunk(CSLINE
, ch
.where
, 'end'), \
1452 chunk(GROUP
, ch
.where
, [\
1453 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1454 i
, length
= i
+2, length
+2
1456 print 'WARNING: ending env ' + `envname`
+ 'has no actions'
1458 elif ch
.chtype
== chunk_type(CSNAME
):
1459 # control name transformations
1460 if s(buf
, ch
.data
) in ignoredcommands
:
1462 i
, length
= i
-1, length
-1
1463 elif s(buf
, ch
.data
) == '@' and \
1465 pp
[i
].chtype
== chunk_type(PLAIN
) and \
1466 s(buf
, pp
[i
].data
)[0] == '.':
1471 elif s(buf
, ch
.data
) == '\\':
1474 elif len(s(buf
, ch
.data
)) == 1 and \
1475 s(buf
, ch
.data
) in onlylatexspecial
:
1476 ch
.chtype
= chunk_type(PLAIN
)
1477 # check if such a command is followed by
1478 # an empty group: e.g., `\%{}'. If so, remove
1479 # this empty group too
1481 pp
[i
].chtype
== chunk_type(GROUP
) \
1482 and len(pp
[i
].data
) == 0:
1486 elif hist
.inargs
and s(buf
, ch
.data
) in inargsselves
:
1487 # This is the special processing of the
1488 # arguments of the \begin{funcdesc}... or
1489 # \funcline... arguments
1490 # \, --> , \[ --> [, \] --> ]
1491 ch
.chtype
= chunk_type(PLAIN
)
1493 elif s(buf
, ch
.data
) == 'renewcommand':
1494 # \renewcommand{\indexsubitem}....
1495 i
, length
= i
-1, length
-1
1497 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1500 and pp
[i
].chtype
== chunk_type(CSNAME
) \
1501 and s(buf
, pp
[i
].data
) == 'indexsubitem':
1503 length
= length
- (newi
-i
)
1504 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1505 text
= flattext(buf
, pp
[i
:newi
])
1506 if text
[0] != '(' or text
[-1] != ')':
1507 raise error
, 'expected indexsubitme enclosed in braces'
1508 words
= string
.split(text
[1:-1])
1509 hist
.indexsubitem
= words
1512 print 'WARNING: renewcommand with unsupported arg removed'
1514 length
= length
- (newi
-i
)
1516 elif s(buf
, ch
.data
) == 'item':
1517 ch
.chtype
= chunk_type(CSLINE
)
1518 length
, newi
= getoptarg(length
, buf
, pp
, i
)
1519 ingroupch
= pp
[i
:newi
]
1521 length
= length
- (newi
-i
)
1522 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1523 i
, length
= i
+1, length
+1
1525 elif s(buf
, ch
.data
) == 'ttindex':
1526 idxsi
= hist
.indexsubitem
1529 if len(idxsi
) >= 2 and idxsi
[1] in \
1530 ('method', 'function'):
1532 elif len(idxsi
) >= 2 and idxsi
[1] in \
1533 ('exception', 'object'):
1536 print 'WARNING: can\'t categorize ' + `idxsi`
+ ' for \'ttindex\' command'
1540 cat_class
= '('+string
.join(idxsi
)+')'
1542 ch
.chtype
= chunk_type(CSLINE
)
1545 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1548 length
= length
- (newi
-i
)
1550 cat_arg
= [chunk(PLAIN
, ch
.where
, cat_class
)]
1552 # determine what should be set in roman, and
1554 if command
in regindices
:
1556 arg
= [chunk(CSNAME
, ch
.where
, 't'), \
1557 chunk(GROUP
, ch
.where
, arg
)]
1559 cat_arg
= [chunk(CSNAME
, ch
.where
, 'r'), \
1560 chunk(GROUP
, ch
.where
, cat_arg
)]
1563 [chunk(PLAIN
, ch
.where
, ' ')] + \
1566 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1567 length
, i
= length
+1, i
+1
1570 elif s(buf
, ch
.data
) == 'ldots':
1571 # \ldots --> \dots{} --> @dots{}
1574 or pp
[i
].chtype
!= chunk_type(GROUP
) \
1575 or pp
[i
].data
!= []:
1576 pp
.insert(i
, chunk(GROUP
, ch
.where
, []))
1577 i
, length
= i
+1, length
+1
1578 elif s(buf
, ch
.data
) in wordsselves
:
1580 ch
.chtype
= chunk_type(PLAIN
)
1582 and pp
[i
].chtype
== chunk_type(GROUP
) \
1583 and pp
[i
].data
== []:
1586 elif s(buf
, ch
.data
) in for_texi
:
1589 elif s(buf
, ch
.data
) == 'e':
1592 ch
.chtype
= chunk_type(PLAIN
)
1593 elif s(buf
, ch
.data
) == 'lineiii':
1594 # This is the most tricky one
1595 # \lineiii{a1}{a2}[{a3}] -->
1596 # @item @<cts. of itemargmacro>{a1}
1599 ##print 'LINEIIIIII!!!!!!!'
1600 ## wobj = Wobj().init()
1601 ## dumpit(buf, wobj.write, pp[i-1:i+5])
1602 ## print '--->' + wobj.data + '<----'
1605 'no environment for lineiii'
1606 if hist
.inenv
[0] != 'tableiii':
1608 'wrong command (' + \
1610 ') in wrong environment (' \
1611 + `hist
.inenv
[0]`
+ ')'
1612 ch
.chtype
= chunk_type(CSLINE
)
1614 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1615 ingroupch
= [chunk(CSNAME
, 0, \
1616 hist
.itemargmacro
), \
1617 chunk(GROUP
, 0, pp
[i
:newi
])]
1619 length
= length
- (newi
-i
)
1620 ## print 'ITEM ARG: --->',
1621 ## wobj = Wobj().init()
1622 ## dumpit(buf, wobj.write, ingroupch)
1623 ## print wobj.data, '<---'
1624 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1626 i
, length
= i
+1, length
+1
1627 length
, i
= getnextarg(length
, buf
, pp
, i
)
1628 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1631 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ' --- '))
1634 ## pp[grouppos].data = pp[grouppos].data \
1635 ## + [chunk(PLAIN, ch.where, ' ')] \
1638 ## length = length - (newi-i)
1639 if length
!= len(pp
):
1640 raise 'IN LINEIII IS THE ERR', `i`
1642 elif s(buf
, ch
.data
) in ('chapter', 'section', 'subsection', 'subsubsection'):
1643 #\xxxsection{A} ---->
1646 ## also: remove commas and quotes
1647 ch
.chtype
= chunk_type(CSLINE
)
1648 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1649 afternodenamecmd
= next_command_p(length
, buf
, pp
, newi
, 'nodename')
1650 if afternodenamecmd
< 0:
1651 cp1
= crcopy(pp
[i
:newi
])
1653 chunk(GROUP
, ch
.where
, \
1655 length
, newi
= length
- (newi
-i
) + 1, \
1657 text
= flattext(buf
, cp1
)
1658 text
= invent_node_names(text
)
1660 length
, endarg
= getnextarg(length
, buf
, pp
, afternodenamecmd
)
1661 cp1
= crcopy(pp
[afternodenamecmd
:endarg
])
1663 length
= length
- (endarg
-newi
)
1666 chunk(GROUP
, ch
.where
, \
1668 length
, newi
= length
- (newi
-i
) + 1, \
1670 text
= flattext(buf
, cp1
)
1673 ## print 'FLATTEXT:', `text`
1674 if text
in hist
.nodenames
:
1675 print 'WARNING: node name ' + `text`
+ ' already used'
1676 out
.doublenodes
.append(text
)
1678 hist
.nodenames
.append(text
)
1679 text
= rm_commas_etc(text
)
1681 chunk(CSLINE
, ch
.where
, 'node'), \
1682 chunk(GROUP
, ch
.where
, [\
1683 chunk(PLAIN
, ch
.where
, text
+', , ,')\
1685 i
, length
= newi
+2, length
+2
1687 elif s(buf
,ch
.data
) == 'funcline':
1688 # fold it to a very short environment
1690 chunk(CSLINE
, ch
.where
, 'end'), \
1691 chunk(GROUP
, ch
.where
, [\
1692 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1693 i
, length
= i
+2, length
+2
1694 length
, i
= do_funcdesc(length
, buf
, pp
, i
)
1696 elif s(buf
,ch
.data
) == 'dataline':
1698 chunk(CSLINE
, ch
.where
, 'end'), \
1699 chunk(GROUP
, ch
.where
, [\
1700 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1701 i
, length
= i
+2, length
+2
1702 length
, i
= do_datadesc(length
, buf
, pp
, i
)
1704 elif s(buf
,ch
.data
) == 'excline':
1706 chunk(CSLINE
, ch
.where
, 'end'), \
1707 chunk(GROUP
, ch
.where
, [\
1708 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1709 i
, length
= i
+2, length
+2
1710 length
, i
= do_excdesc(length
, buf
, pp
, i
)
1713 elif s(buf
, ch
.data
) == 'index':
1716 ch
.chtype
= chunk_type(CSLINE
)
1718 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1720 ingroupch
= pp
[i
:newi
]
1722 length
= length
- (newi
-i
)
1723 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1724 length
, i
= length
+1, i
+1
1726 elif s(buf
, ch
.data
) == 'bifuncindex':
1727 ch
.chtype
= chunk_type(CSLINE
)
1729 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1730 ingroupch
= pp
[i
:newi
]
1732 length
= length
- (newi
-i
)
1734 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1735 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1736 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1737 chunk(PLAIN
, ch
.where
, \
1738 '(built-in function)')]))
1740 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1741 length
, i
= length
+1, i
+1
1744 elif s(buf
, ch
.data
) == 'opindex':
1745 ch
.chtype
= chunk_type(CSLINE
)
1747 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1748 ingroupch
= pp
[i
:newi
]
1750 length
= length
- (newi
-i
)
1752 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1753 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1754 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1755 chunk(PLAIN
, ch
.where
, \
1758 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1759 length
, i
= length
+1, i
+1
1762 elif s(buf
, ch
.data
) == 'bimodindex':
1763 ch
.chtype
= chunk_type(CSLINE
)
1765 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1766 ingroupch
= pp
[i
:newi
]
1768 length
= length
- (newi
-i
)
1770 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1771 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1772 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1773 chunk(PLAIN
, ch
.where
, \
1776 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1777 length
, i
= length
+1, i
+1
1779 elif s(buf
, ch
.data
) == 'sectcode':
1783 elif s(buf
, ch
.data
) == 'stmodindex':
1784 ch
.chtype
= chunk_type(CSLINE
)
1785 # use the program index as module index
1787 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1788 ingroupch
= pp
[i
:newi
]
1790 length
= length
- (newi
-i
)
1792 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1793 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1794 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1795 chunk(PLAIN
, ch
.where
, \
1798 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1799 length
, i
= length
+1, i
+1
1802 elif s(buf
, ch
.data
) == 'stindex':
1803 # XXX must actually go to newindex st
1805 ch
.chtype
= chunk_type(CSLINE
)
1807 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1808 ingroupch
= [chunk(CSNAME
, wh
, 'code'), \
1809 chunk(GROUP
, wh
, pp
[i
:newi
])]
1812 length
= length
- (newi
-i
)
1815 t
.append(chunk(PLAIN
, wh
, ' statement'))
1817 pp
.insert(i
, chunk(GROUP
, wh
, t
))
1818 i
, length
= i
+1, length
+1
1820 pp
.insert(i
, chunk(CSLINE
, wh
, 'cindex'))
1821 i
, length
= i
+1, length
+1
1824 t
.insert(0, chunk(PLAIN
, wh
, 'statement, '))
1826 pp
.insert(i
, chunk(GROUP
, wh
, t
))
1827 i
, length
= i
+1, length
+1
1830 elif s(buf
, ch
.data
) == 'indexii':
1831 #\indexii{A}{B} --->
1834 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1836 cp21
= crcopy(pp
[i
:newi
])
1838 length
= length
- (newi
-i
)
1839 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1841 cp22
= crcopy(pp
[i
:newi
])
1843 length
= length
- (newi
-i
)
1845 ch
.chtype
= chunk_type(CSLINE
)
1847 pp
.insert(i
, chunk(GROUP
, ch
.where
, cp11
+ [\
1848 chunk(PLAIN
, ch
.where
, ' ')] + cp12
))
1849 i
, length
= i
+1, length
+1
1850 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'cindex'), \
1851 chunk(GROUP
, ch
.where
, cp22
+ [\
1852 chunk(PLAIN
, ch
.where
, ', ')]+ cp21
)]
1853 i
, length
= i
+2, length
+2
1855 elif s(buf
, ch
.data
) == 'indexiii':
1856 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1858 cp21
= crcopy(pp
[i
:newi
])
1859 cp31
= crcopy(pp
[i
:newi
])
1861 length
= length
- (newi
-i
)
1862 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1864 cp22
= crcopy(pp
[i
:newi
])
1865 cp32
= crcopy(pp
[i
:newi
])
1867 length
= length
- (newi
-i
)
1868 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1870 cp23
= crcopy(pp
[i
:newi
])
1871 cp33
= crcopy(pp
[i
:newi
])
1873 length
= length
- (newi
-i
)
1875 ch
.chtype
= chunk_type(CSLINE
)
1877 pp
.insert(i
, chunk(GROUP
, ch
.where
, cp11
+ [\
1878 chunk(PLAIN
, ch
.where
, ' ')] + cp12 \
1879 + [chunk(PLAIN
, ch
.where
, ' ')] \
1881 i
, length
= i
+1, length
+1
1882 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'cindex'), \
1883 chunk(GROUP
, ch
.where
, cp22
+ [\
1884 chunk(PLAIN
, ch
.where
, ' ')]+ cp23\
1885 + [chunk(PLAIN
, ch
.where
, ', ')] +\
1887 i
, length
= i
+2, length
+2
1888 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'cindex'), \
1889 chunk(GROUP
, ch
.where
, cp33
+ [\
1890 chunk(PLAIN
, ch
.where
, ', ')]+ cp31\
1891 + [chunk(PLAIN
, ch
.where
, ' ')] +\
1893 i
, length
= i
+2, length
+2
1896 elif s(buf
, ch
.data
) == 'indexiv':
1897 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1899 cp21
= crcopy(pp
[i
:newi
])
1900 cp31
= crcopy(pp
[i
:newi
])
1901 cp41
= crcopy(pp
[i
:newi
])
1903 length
= length
- (newi
-i
)
1904 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1906 cp22
= crcopy(pp
[i
:newi
])
1907 cp32
= crcopy(pp
[i
:newi
])
1908 cp42
= crcopy(pp
[i
:newi
])
1910 length
= length
- (newi
-i
)
1911 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1913 cp23
= crcopy(pp
[i
:newi
])
1914 cp33
= crcopy(pp
[i
:newi
])
1915 cp43
= crcopy(pp
[i
:newi
])
1917 length
= length
- (newi
-i
)
1918 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1920 cp24
= crcopy(pp
[i
:newi
])
1921 cp34
= crcopy(pp
[i
:newi
])
1922 cp44
= crcopy(pp
[i
:newi
])
1924 length
= length
- (newi
-i
)
1926 ch
.chtype
= chunk_type(CSLINE
)
1928 ingroupch
= cp11
+ \
1932 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1933 i
, length
= i
+1, length
+1
1934 ingroupch
= cp22
+ \
1938 pp
[i
:i
] = cindexch
+ [\
1939 chunk(GROUP
, ch
.where
, ingroupch
)]
1940 i
, length
= i
+2, length
+2
1941 ingroupch
= cp33
+ \
1945 pp
[i
:i
] = cindexch
+ [\
1946 chunk(GROUP
, ch
.where
, ingroupch
)]
1947 i
, length
= i
+2, length
+2
1948 ingroupch
= cp44
+ \
1952 pp
[i
:i
] = cindexch
+ [\
1953 chunk(GROUP
, ch
.where
, ingroupch
)]
1954 i
, length
= i
+2, length
+2
1959 print 'don\'t know what to do with keyword ' + `
s(buf
, ch
.data
)`
1963 re_atsign
= regex
.compile('[@{}]')
1964 re_newline
= regex
.compile('\n')
1966 def dumpit(buf
, wm
, pp
):
1970 i
, length
= 0, len(pp
)
1975 if len(pp
) != length
:
1976 raise 'FATAL', 'inconsistent length'
1988 if ch
.chtype
== chunk_type(CSNAME
):
1989 wm('@' + s(buf
, ch
.data
))
1990 if s(buf
, ch
.data
) == 'node' and \
1991 pp
[i
].chtype
== chunk_type(PLAIN
) and \
1992 s(buf
, pp
[i
].data
) in out
.doublenodes
:
1993 ##XXX doesnt work yet??
1994 wm(' ZZZ-' + zfill(`i`
, 4))
1995 if s(buf
, ch
.data
)[0] in string
.letters
:
1997 elif ch
.chtype
== chunk_type(PLAIN
):
1998 if dospace
and s(buf
, ch
.data
) not in (' ', '\t'):
2000 text
= s(buf
, ch
.data
)
2002 pos
= re_atsign
.search(text
)
2005 wm(text
[:pos
] + '@' + text
[pos
])
2008 elif ch
.chtype
== chunk_type(GROUP
):
2010 dumpit(buf
, wm
, ch
.data
)
2012 elif ch
.chtype
== chunk_type(DENDLINE
):
2014 while i
!= length
and pp
[i
].chtype
in \
2015 (chunk_type(DENDLINE
), chunk_type(ENDLINE
)):
2017 elif ch
.chtype
== chunk_type(OTHER
):
2019 elif ch
.chtype
== chunk_type(ACTIVE
):
2021 elif ch
.chtype
== chunk_type(ENDLINE
):
2023 elif ch
.chtype
== chunk_type(CSLINE
):
2024 if i
>= 2 and pp
[i
-2].chtype
not in \
2025 (chunk_type(ENDLINE
), chunk_type(DENDLINE
)) \
2026 and (pp
[i
-2].chtype
!= chunk_type(PLAIN
) \
2027 or s(buf
, pp
[i
-2].data
)[-1] != '\n'):
2030 wm('@' + s(buf
, ch
.data
))
2032 raise error
, 'CSLINE expected another chunk'
2033 if pp
[i
].chtype
!= chunk_type(GROUP
):
2034 raise error
, 'CSLINE expected GROUP'
2035 if type(pp
[i
].data
) != type([]):
2036 raise error
, 'GROUP chould contain []-data'
2038 wobj
= Wobj().init()
2039 dumpit(buf
, wobj
.write
, pp
[i
].data
)
2046 pos
= re_newline
.search(text
)
2049 print 'WARNING: found newline in csline arg'
2050 wm(text
[:pos
] + ' ')
2054 pp
[i
].chtype
not in (chunk_type(CSLINE
), \
2055 chunk_type(ENDLINE
), chunk_type(DENDLINE
)) \
2056 and (pp
[i
].chtype
!= chunk_type(PLAIN
) \
2057 or s(buf
, pp
[i
].data
)[0] != '\n'):
2060 elif ch
.chtype
== chunk_type(COMMENT
):
2061 ## print 'COMMENT: previous chunk =', pp[i-2]
2062 if pp
[i
-2].chtype
== chunk_type(PLAIN
):
2063 print 'PLAINTEXT =', `
s(buf
, pp
[i
-2].data
)`
2064 if s(buf
, ch
.data
) and \
2065 regex
.match('^[ \t]*$', s(buf
, ch
.data
)) < 0:
2066 if i
>= 2 and pp
[i
-2].chtype
not in \
2067 (chunk_type(ENDLINE
), chunk_type(DENDLINE
)) \
2068 and not (pp
[i
-2].chtype
== chunk_type(PLAIN
) \
2069 and regex
.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf
, pp
[i
-2].data
)) >= 0):
2070 print 'ADDING NEWLINE'
2072 wm('@c ' + s(buf
, ch
.data
))
2073 elif ch
.chtype
== chunk_type(IGNORE
):
2077 str = `
s(buf
, ch
.data
)`
2081 str = str[:400] + '...'
2082 print 'warning:', ch
.chtype
, 'not handled, data ' + str
2088 headerfile
= 'texipre.dat'
2089 trailerfile
= 'texipost.dat'
2092 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'o:h:t:')
2093 except getopt
.error
:
2097 print 'usage: partparse [-o outfile] [-h headerfile]',
2098 print '[-t trailerfile] file ...'
2101 for opt
, arg
in opts
:
2102 if opt
== '-o': outfile
= arg
2103 if opt
== '-h': headerfile
= arg
2104 if opt
== '-t': trailerfile
= arg
2107 root
, ext
= os
.path
.splitext(args
[0])
2108 outfile
= root
+ '.texi'
2111 print 'will not overwrite input file', outfile
2114 outf
= open(outfile
, 'w')
2115 outf
.write(open(headerfile
, 'r').read())
2118 if len(args
) > 1: print '='*20, file, '='*20
2119 buf
= open(file, 'r').read()
2120 w
, pp
= parseit(buf
)
2123 dumpit(buf
, outf
.write
, pp
)
2125 outf
.write(open(trailerfile
, 'r').read())