2 # partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
3 # and generate texinfo source.
5 # This is *not* a good example of good programming practices. In fact, this
6 # file could use a complete rewrite, in order to become faster, more
7 # easy extensible and maintainable.
9 # However, I added some comments on a few places for the pityful person who
10 # would ever need to take a look into this file.
12 # Have I been clear enough??
17 import sys
, string
, regex
, getopt
, os
19 # Different parse modes for phase 1
28 the_modes
= MODE_REGULAR
, MODE_VERBATIM
, MODE_CS_SCAN
, MODE_COMMENT
, \
29 MODE_MATH
, MODE_DMATH
, MODE_GOBBLEWHITE
31 # Show the neighbourhood of the scanned buffer
32 def epsilon(buf
, where
):
33 wmt
, wpt
= where
- 10, where
+ 10
38 return ' Context ' + `buf
[wmt
:where
]`
+ '.' + `buf
[where
:wpt
]`
+ '.'
40 # Should return the line number. never worked
43 return ' Line ' + `lineno`
+ '.'
45 # Displays the recursion level.
47 return ' Level ' + `lvl`
+ '.'
49 # Combine the three previous functions. Used often.
50 def lle(lvl
, buf
, where
):
51 return lv(lvl
) + lin() + epsilon(buf
, where
)
54 # This class is only needed for _symbolic_ representation of the parse mode.
57 if arg
not in the_modes
:
58 raise ValueError, 'mode not in the_modes'
62 def __cmp__(self
, other
):
63 if type(self
) != type(other
):
65 return cmp(self
.mode
, other
.mode
)
68 if self
.mode
== MODE_REGULAR
:
70 elif self
.mode
== MODE_VERBATIM
:
71 return 'MODE_VERBATIM'
72 elif self
.mode
== MODE_CS_SCAN
:
74 elif self
.mode
== MODE_COMMENT
:
76 elif self
.mode
== MODE_MATH
:
78 elif self
.mode
== MODE_DMATH
:
80 elif self
.mode
== MODE_GOBBLEWHITE
:
81 return 'MODE_GOBBLEWHITE'
83 raise ValueError, 'mode not in the_modes'
85 # just a wrapper around a class initialisation
87 return Mode().init(arg
)
90 # After phase 1, the text consists of chunks, with a certain type
91 # this type will be assigned to the chtype member of the chunk
92 # the where-field contains the file position where this is found
93 # and the data field contains (1): a tuple describing start- end end
94 # positions of the substring (can be used as slice for the buf-variable),
95 # (2) just a string, mostly generated by the changeit routine,
96 # or (3) a list, describing a (recursive) subgroup of chunks
97 PLAIN
= 0 # ASSUME PLAINTEXT, data = the text
98 GROUP
= 1 # GROUP ({}), data = [chunk, chunk,..]
99 CSNAME
= 2 # CONTROL SEQ TOKEN, data = the command
100 COMMENT
= 3 # data is the actual comment
101 DMATH
= 4 # DISPLAYMATH, data = [chunk, chunk,..]
102 MATH
= 5 # MATH, see DISPLAYMATH
103 OTHER
= 6 # CHAR WITH CATCODE OTHER, data = char
104 ACTIVE
= 7 # ACTIVE CHAR
105 GOBBLEDWHITE
= 8 # Gobbled LWSP, after CSNAME
106 ENDLINE
= 9 # END-OF-LINE, data = '\n'
107 DENDLINE
= 10 # DOUBLE EOL, data='\n', indicates \par
108 ENV
= 11 # LaTeX-environment
109 # data =(envname,[ch,ch,ch,.])
110 CSLINE
= 12 # for texi: next chunk will be one group
111 # of args. Will be set all on 1 line
112 IGNORE
= 13 # IGNORE this data
113 ENDENV
= 14 # TEMP END OF GROUP INDICATOR
114 IF
= 15 # IF-directive
115 # data = (flag,negate,[ch, ch, ch,...])
116 the_types
= PLAIN
, GROUP
, CSNAME
, COMMENT
, DMATH
, MATH
, OTHER
, ACTIVE
, \
117 GOBBLEDWHITE
, ENDLINE
, DENDLINE
, ENV
, CSLINE
, IGNORE
, ENDENV
, IF
119 # class, just to display symbolic name
121 def init(self
, chunk_type
):
122 if chunk_type
not in the_types
:
123 raise 'ValueError', 'chunk_type not in the_types'
124 self
.chunk_type
= chunk_type
127 def __cmp__(self
, other
):
128 if type(self
) != type(other
):
129 other
= chunk_type(other
)
130 return cmp(self
.chunk_type
, other
.chunk_type
)
133 if self
.chunk_type
== PLAIN
:
135 elif self
.chunk_type
== GROUP
:
137 elif self
.chunk_type
== CSNAME
:
139 elif self
.chunk_type
== COMMENT
:
141 elif self
.chunk_type
== DMATH
:
143 elif self
.chunk_type
== MATH
:
145 elif self
.chunk_type
== OTHER
:
147 elif self
.chunk_type
== ACTIVE
:
149 elif self
.chunk_type
== GOBBLEDWHITE
:
150 return 'GOBBLEDWHITE'
151 elif self
.chunk_type
== DENDLINE
:
153 elif self
.chunk_type
== ENDLINE
:
155 elif self
.chunk_type
== ENV
:
157 elif self
.chunk_type
== CSLINE
:
159 elif self
.chunk_type
== IGNORE
:
161 elif self
.chunk_type
== ENDENV
:
163 elif self
.chunk_type
== IF
:
166 raise ValueError, 'chunk_type not in the_types'
169 def chunk_type(type):
170 return ChunkType().init(type)
172 # store a type object of the ChunkType-class-instance...
173 chunk_type_type
= type(chunk_type(0))
175 # this class contains a part of the parsed buffer
177 def init(self
, chtype
, where
, data
):
178 if type(chtype
) != chunk_type_type
:
179 chtype
= chunk_type(chtype
)
181 if type(where
) != type(0):
182 raise TypeError, '\'where\' is not a number'
185 ##print 'CHUNK', self
189 return 'chunk' + `self
.chtype
, self
.where
, self
.data`
192 def chunk(chtype
, where
, data
):
193 return Chunk().init(chtype
, where
, data
)
197 error
= 'partparse.error'
239 # Show a list of catcode-name-symbols
243 result
= result
+ cc_names
[i
] + ', '
244 return '[' + result
[:-2] + ']'
246 # the name of the catcode (ACTIVE, OTHER, etc.)
248 return cc_names
[code
]
251 # Which catcodes make the parser stop parsing regular plaintext
252 regular_stopcodes
= [CC_ESCAPE
, CC_LBRACE
, CC_RBRACE
, CC_MATHSHIFT
, \
253 CC_ALIGNMENT
, CC_PARAMETER
, CC_SUPERSCRIPT
, CC_SUBSCRIPT
, \
254 CC_IGNORE
, CC_ACTIVE
, CC_COMMENT
, CC_INVALID
, CC_ENDLINE
]
256 # same for scanning a control sequence name
257 csname_scancodes
= [CC_LETTER
]
259 # same for gobbling LWSP
260 white_scancodes
= [CC_WHITE
]
261 ##white_scancodes = [CC_WHITE, CC_ENDLINE]
263 # make a list of all catcode id's, except for catcode ``other''
264 all_but_other_codes
= range(16)
265 del all_but_other_codes
[CC_OTHER
]
266 ##print all_but_other_codes
268 # when does a comment end
269 comment_stopcodes
= [CC_ENDLINE
]
271 # gather all characters together, specified by a list of catcodes
272 def code2string(cc
, codelist
):
273 ##print 'code2string: codelist = ' + pcl(codelist),
275 for category
in codelist
:
277 result
= result
+ cc
[category
]
278 ##print 'result = ' + `result`
281 # automatically generate all characters of catcode other, being the
282 # complement set in the ASCII range (128 characters)
283 def make_other_codes(cc
):
284 otherchars
= range(256) # could be made 256, no problem
285 for category
in all_but_other_codes
:
287 for c
in cc
[category
]:
288 otherchars
[ord(c
)] = None
292 result
= result
+ chr(i
)
295 # catcode dump (which characters have which catcodes).
296 def dump_cc(name
, cc
):
298 ##print '=' * (8+len(name))
300 raise TypeError, 'cc not good cat class'
301 ## for i in range(16):
302 ## print pc(i) + '\t' + `cc[i]`
305 # In the beginning,....
306 epoch_cc
= [None] * 16
307 ##dump_cc('epoch_cc', epoch_cc)
311 initex_cc
= epoch_cc
[:]
312 initex_cc
[CC_ESCAPE
] = '\\'
313 initex_cc
[CC_ENDLINE
], initex_cc
[CC_IGNORE
], initex_cc
[CC_WHITE
] = \
315 initex_cc
[CC_LETTER
] = string
.uppercase
+ string
.lowercase
316 initex_cc
[CC_COMMENT
], initex_cc
[CC_INVALID
] = '%', '\x7F'
317 #initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
318 ##dump_cc('initex_cc', initex_cc)
321 # LPLAIN: LaTeX catcode setting (see lplain.tex)
322 lplain_cc
= initex_cc
[:]
323 lplain_cc
[CC_LBRACE
], lplain_cc
[CC_RBRACE
] = '{', '}'
324 lplain_cc
[CC_MATHSHIFT
] = '$'
325 lplain_cc
[CC_ALIGNMENT
] = '&'
326 lplain_cc
[CC_PARAMETER
] = '#'
327 lplain_cc
[CC_SUPERSCRIPT
] = '^\x0B' # '^' and C-k
328 lplain_cc
[CC_SUBSCRIPT
] = '_\x01' # '_' and C-a
329 lplain_cc
[CC_WHITE
] = lplain_cc
[CC_WHITE
] + '\t'
330 lplain_cc
[CC_ACTIVE
] = '~\x0C' # '~' and C-l
331 lplain_cc
[CC_OTHER
] = make_other_codes(lplain_cc
)
332 ##dump_cc('lplain_cc', lplain_cc)
335 # Guido's LaTeX environment catcoded '_' as ``other''
336 # my own purpose catlist
338 my_cc
[CC_SUBSCRIPT
] = my_cc
[CC_SUBSCRIPT
][1:] # remove '_' here
339 my_cc
[CC_OTHER
] = my_cc
[CC_OTHER
] + '_' # add it to OTHER list
340 dump_cc('my_cc', my_cc
)
344 # needed for un_re, my equivalent for regexp-quote in Emacs
345 re_meaning
= '\\[]^$'
351 result
= result
+ '\\'
355 # NOTE the negate ('^') operator in *some* of the regexps below
356 def make_rc_regular(cc
):
357 # problems here if '[]' are included!!
358 return regex
.compile('[' + code2string(cc
, regular_stopcodes
) + ']')
360 def make_rc_cs_scan(cc
):
361 return regex
.compile('[^' + code2string(cc
, csname_scancodes
) + ']')
363 def make_rc_comment(cc
):
364 return regex
.compile('[' + code2string(cc
, comment_stopcodes
) + ']')
366 def make_rc_endwhite(cc
):
367 return regex
.compile('[^' + code2string(cc
, white_scancodes
) + ']')
371 # regular: normal mode:
372 rc_regular
= make_rc_regular(my_cc
)
374 # scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
375 rc_cs_scan
= make_rc_cs_scan(my_cc
)
376 rc_comment
= make_rc_comment(my_cc
)
377 rc_endwhite
= make_rc_endwhite(my_cc
)
380 # parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
381 # RECURSION-LEVEL will is incremented on entry.
382 # result contains the list of chunks returned
383 # together with this list, the buffer position is returned
385 # RECURSION-LEVEL will be set to zero *again*, when recursively a
386 # {,D}MATH-mode scan has been enetered.
387 # This has been done in order to better check for environment-mismatches
389 def parseit(buf
, *rest
):
393 parsemode
, start
, lvl
= rest
395 parsemode
, start
, lvl
= rest
+ (0, )
397 parsemode
, start
, lvl
= rest
+ (0, 0)
399 parsemode
, start
, lvl
= mode(MODE_REGULAR
), 0, 0
401 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
404 if lvl
== 0 and parsemode
== mode(MODE_REGULAR
):
408 ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
411 # some of the more regular modes...
414 if parsemode
in (mode(MODE_REGULAR
), mode(MODE_DMATH
), mode(MODE_MATH
)):
420 #print '\tnew round: ' + epsilon(buf, where)
422 if lvl
> 1 or curpmode
!= mode(MODE_REGULAR
):
423 # not the way we started...
424 raise EOFError, 'premature end of file.' + lle(lvl
, buf
, where
)
425 # the real ending of lvl-1 parse
428 pos
= rc_regular
.search(buf
, where
)
434 newpos
, c
= pos
, chunk(PLAIN
, where
, (where
, pos
))
440 # ok, pos == where and pos != end
442 foundchar
= buf
[where
]
443 if foundchar
in my_cc
[CC_LBRACE
]:
444 # recursive subgroup parse...
445 newpos
, data
= parseit(buf
, curpmode
, where
+1, lvl
)
446 result
.append(chunk(GROUP
, where
, data
))
448 elif foundchar
in my_cc
[CC_RBRACE
]:
450 raise error
, 'ENDGROUP while in base level.' + lle(lvl
, buf
, where
)
451 if lvl
== 1 and mode
!= mode(MODE_REGULAR
):
452 raise error
, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
453 return where
+ 1, result
455 elif foundchar
in my_cc
[CC_ESCAPE
]:
457 # call the routine that actually deals with
458 # this problem. If do_ret is None, than
459 # return the value of do_ret
461 # Note that handle_cs might call this routine
462 # recursively again...
464 do_ret
, newpos
= handlecs(buf
, where
, \
465 curpmode
, lvl
, result
, end
)
469 elif foundchar
in my_cc
[CC_COMMENT
]:
470 newpos
, data
= parseit(buf
, \
471 mode(MODE_COMMENT
), where
+1, lvl
)
472 result
.append(chunk(COMMENT
, where
, data
))
474 elif foundchar
in my_cc
[CC_MATHSHIFT
]:
475 # note that recursive calls to math-mode
476 # scanning are called with recursion-level 0
477 # again, in order to check for bad mathend
479 if where
+ 1 != end
and \
483 # double mathshift, e.g. '$$'
485 if curpmode
== mode(MODE_REGULAR
):
486 newpos
, data
= parseit(buf
, \
489 result
.append(chunk(DMATH
, \
491 elif curpmode
== mode(MODE_MATH
):
492 raise error
, 'wrong math delimiiter' + lin() + epsilon(buf
, where
)
494 raise error
, 'bad mathend.' + \
497 return where
+ 2, result
500 # single math shift, e.g. '$'
502 if curpmode
== mode(MODE_REGULAR
):
503 newpos
, data
= parseit(buf
, \
506 result
.append(chunk(MATH
, \
508 elif curpmode
== mode(MODE_DMATH
):
509 raise error
, 'wrong math delimiiter' + lin() + epsilon(buf
, where
)
511 raise error
, 'bad mathend.' + \
514 return where
+ 1, result
516 elif foundchar
in my_cc
[CC_IGNORE
]:
517 print 'warning: ignored char', `foundchar`
520 elif foundchar
in my_cc
[CC_ACTIVE
]:
521 result
.append(chunk(ACTIVE
, where
, foundchar
))
524 elif foundchar
in my_cc
[CC_INVALID
]:
525 raise error
, 'invalid char ' + `foundchar`
528 elif foundchar
in my_cc
[CC_ENDLINE
]:
530 # after an end of line, eat the rest of
531 # whitespace on the beginning of the next line
532 # this is what LaTeX more or less does
534 # also, try to indicate double newlines (\par)
538 newpos
, dummy
= parseit(buf
, mode(MODE_GOBBLEWHITE
), where
+ 1, lvl
)
539 if newpos
!= end
and buf
[newpos
] in \
541 result
.append(chunk(DENDLINE
, \
542 savedwhere
, foundchar
))
544 result
.append(chunk(ENDLINE
, \
545 savedwhere
, foundchar
))
547 result
.append(chunk(OTHER
, where
, foundchar
))
550 elif parsemode
== mode(MODE_CS_SCAN
):
552 # scan for a control sequence token. `\ape', `\nut' or `\%'
555 raise EOFError, 'can\'t find end of csname'
556 pos
= rc_cs_scan
.search(buf
, start
)
560 # first non-letter right where we started the search
561 # ---> the control sequence name consists of one single
562 # character. Also: don't eat white space...
563 if buf
[pos
] in my_cc
[CC_ENDLINE
]:
566 return pos
, (start
, pos
)
572 pos2
, dummy
= parseit(buf
, \
573 mode(MODE_GOBBLEWHITE
), spos
, lvl
)
574 return pos2
, (start
, pos
)
576 elif parsemode
== mode(MODE_GOBBLEWHITE
):
579 pos
= rc_endwhite
.search(buf
, start
)
582 return pos
, (start
, pos
)
584 elif parsemode
== mode(MODE_COMMENT
):
585 pos
= rc_comment
.search(buf
, start
)
588 print 'no newline perhaps?'
589 raise EOFError, 'can\'t find end of comment'
591 pos2
, dummy
= parseit(buf
, mode(MODE_GOBBLEWHITE
), pos
, lvl
)
592 return pos2
, (start
, pos
)
596 raise error
, 'Unknown mode (' + `parsemode`
+ ')'
599 #moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
601 #boxcommands = 'mbox', 'fbox'
602 #defcommands = 'def', 'newcommand'
604 endverbstr
= '\\end{verbatim}'
606 re_endverb
= regex
.compile(un_re(endverbstr
))
609 # handlecs: helper function for parseit, for the special thing we might
610 # wanna do after certain command control sequences
611 # returns: None or return_data, newpos
613 # in the latter case, the calling function is instructed to immediately
614 # return with the data in return_data
616 def handlecs(buf
, where
, curpmode
, lvl
, result
, end
):
619 # get the control sequence name...
620 newpos
, data
= parseit(buf
, mode(MODE_CS_SCAN
), where
+1, lvl
)
623 if s(buf
, data
) in ('begin', 'end'):
624 # skip the expected '{' and get the LaTeX-envname '}'
625 newpos
, data
= parseit(buf
, mode(MODE_REGULAR
), newpos
+1, lvl
)
627 raise error
, 'expected 1 chunk of data.' + \
630 # yucky, we've got an environment
631 envname
= s(buf
, data
[0].data
)
632 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
633 if s(buf
, saveddata
) == 'begin' and envname
== 'verbatim':
634 # verbatim deserves special treatment
635 pos
= re_endverb
.search(buf
, newpos
)
637 raise error
, `endverbstr`
+ ' not found.' + lle(lvl
, buf
, where
)
638 result
.append(chunk(ENV
, where
, (envname
, [chunk(PLAIN
, newpos
, (newpos
, pos
))])))
639 newpos
= pos
+ len(endverbstr
)
641 elif s(buf
, saveddata
) == 'begin':
642 # start parsing recursively... If that parse returns
643 # from an '\end{...}', then should the last item of
644 # the returned data be a string containing the ended
646 newpos
, data
= parseit(buf
, curpmode
, newpos
, lvl
)
647 if not data
or type(data
[-1]) != type(''):
648 raise error
, 'missing \'end\'' + lle(lvl
, buf
, where
) + epsilon(buf
, newpos
)
651 if retenv
!= envname
:
652 #[`retenv`, `envname`]
653 raise error
, 'environments do not match.' + \
654 lle(lvl
, buf
, where
) + \
656 result
.append(chunk(ENV
, where
, (retenv
, data
)))
658 # 'end'... append the environment name, as just
659 # pointed out, and order parsit to return...
660 result
.append(envname
)
661 ##print 'POINT of return: ' + epsilon(buf, newpos)
662 # the tuple will be returned by parseit
663 return (newpos
, result
), newpos
665 # end of \begin ... \end handling
667 elif s(buf
, data
)[0:2] == 'if':
668 # another scary monster: the 'if' directive
669 flag
= s(buf
, data
)[2:]
671 # recursively call parseit, just like environment above..
672 # the last item of data should contain the if-termination
673 # e.g., 'else' of 'fi'
674 newpos
, data
= parseit(buf
, curpmode
, newpos
, lvl
)
675 if not data
or data
[-1] not in ('else', 'fi'):
676 raise error
, 'wrong if... termination' + \
677 lle(lvl
, buf
, where
) + epsilon(buf
, newpos
)
681 # 0 means dont_negate flag
682 result
.append(chunk(IF
, where
, (flag
, 0, data
)))
684 # do the whole thing again, there is only one way
685 # to end this one, by 'fi'
686 newpos
, data
= parseit(buf
, curpmode
, newpos
, lvl
)
687 if not data
or data
[-1] not in ('fi', ):
688 raise error
, 'wrong if...else... termination' \
689 + lle(lvl
, buf
, where
) \
690 + epsilon(buf
, newpos
)
694 result
.append(chunk(IF
, where
, (flag
, 1, data
)))
695 #done implicitely: return None, newpos
697 elif s(buf
, data
) in ('else', 'fi'):
698 result
.append(s(buf
, data
))
699 # order calling party to return tuple
700 return (newpos
, result
), newpos
702 # end of \if, \else, ... \fi handling
704 elif s(buf
, saveddata
) == 'verb':
706 result
.append(chunk(CSNAME
, where
, data
))
708 raise error
, 'premature end of command.' + lle(lvl
, buf
, where
)
710 ##print 'VERB: delimchar ' + `delimchar`
711 pos
= regex
.compile(un_re(delimchar
)).search(buf
, x2
+ 1)
713 raise error
, 'end of \'verb\' argument (' + \
714 `delimchar`
+ ') not found.' + \
716 result
.append(chunk(GROUP
, x2
, [chunk(PLAIN
, x2
+1, (x2
+1, pos
))]))
719 result
.append(chunk(CSNAME
, where
, data
))
722 # this is just a function to get the string value if the possible data-tuple
724 if type(data
) == type(''):
726 if len(data
) != 2 or not (type(data
[0]) == type(data
[1]) == type(0)):
727 raise TypeError, 'expected tuple of 2 integers'
732 ##length, data1, i = getnextarg(length, buf, pp, i + 1)
734 # make a deep-copy of some chunks
738 result
.append(chunkcopy(x
))
743 # copy a chunk, would better be a method of class Chunk...
745 if ch
.chtype
== chunk_type(GROUP
):
747 for i
in range(len(listc
)):
748 listc
[i
] = chunkcopy(listc
[i
])
749 return chunk(GROUP
, ch
.where
, listc
)
751 return chunk(ch
.chtype
, ch
.where
, ch
.data
)
754 # get next argument for TeX-macro, flatten a group (insert between)
755 # or return Command Sequence token, or give back one character
756 def getnextarg(length
, buf
, pp
, item
):
758 ##wobj = Wobj().init()
759 ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
760 ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
762 while item
< length
and pp
[item
].chtype
== chunk_type(ENDLINE
):
766 raise error
, 'no next arg.' + epsilon(buf
, pp
[-1].where
)
767 if pp
[item
].chtype
== chunk_type(GROUP
):
768 newpp
= pp
[item
].data
772 length
= length
+ len(newpp
)
773 pp
[item
:item
] = newpp
774 item
= item
+ len(newpp
)
777 dumpit(buf
, wobj
.write
, newpp
)
778 ##print 'GETNEXTARG: inserted ' + `wobj.data`
780 elif pp
[item
].chtype
== chunk_type(PLAIN
):
782 print 'WARNING: grabbing one char'
783 if len(s(buf
, pp
[item
].data
)) > 1:
784 pp
.insert(item
, chunk(PLAIN
, pp
[item
].where
, s(buf
, pp
[item
].data
)[:1]))
785 item
, length
= item
+1, length
+1
786 pp
[item
].data
= s(buf
, pp
[item
].data
)[1:]
793 str = `
s(buf
, ch
.data
)`
797 str = str[:400] + '...'
798 print 'GETNEXTARG:', ch
.chtype
, 'not handled, data ' + str
802 # this one is needed to find the end of LaTeX's optional argument, like
804 re_endopt
= regex
.compile(']')
806 # get a LaTeX-optional argument, you know, the square braces '[' and ']'
807 def getoptarg(length
, buf
, pp
, item
):
810 dumpit(buf
, wobj
.write
, pp
[item
:min(length
, item
+ 5)])
811 ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
813 if item
>= length
or \
814 pp
[item
].chtype
!= chunk_type(PLAIN
) or \
815 s(buf
, pp
[item
].data
)[0] != '[':
818 pp
[item
].data
= s(buf
, pp
[item
].data
)[1:]
819 if len(pp
[item
].data
) == 0:
825 raise error
, 'No end of optional arg found'
826 if pp
[item
].chtype
== chunk_type(PLAIN
):
827 text
= s(buf
, pp
[item
].data
)
828 pos
= re_endopt
.search(text
)
830 pp
[item
].data
= text
[:pos
]
838 while text
and text
[0] in ' \t':
842 pp
.insert(item
, chunk(PLAIN
, 0, text
))
849 # Wobj just add write-requests to the ``data'' attribute
854 def write(self
, data
):
855 self
.data
= self
.data
+ data
857 # ignore these commands
858 ignoredcommands
= ('bcode', 'ecode')
859 # map commands like these to themselves as plaintext
860 wordsselves
= ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX')
861 # \{ --> {, \} --> }, etc
862 themselves
= ('{', '}', '.', '@', ' ', '\n') + wordsselves
863 # these ones also themselves (see argargs macro in myformat.sty)
864 inargsselves
= (',', '[', ']', '(', ')')
865 # this is how *I* would show the difference between emph and strong
866 # code 1 means: fold to uppercase
867 markcmds
= {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
868 'strong': ('*', '*')}
870 # recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
871 fontchanges
= {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
873 # transparent for these commands
874 for_texi
= ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
875 'file', 'r', 'i', 't')
878 # try to remove macros and return flat text
879 def flattext(buf
, pp
):
881 ##print '---> FLATTEXT ' + `pp`
884 i
, length
= 0, len(pp
)
886 if len(pp
) != length
:
887 raise 'FATAL', 'inconsistent length'
892 if ch
.chtype
== chunk_type(PLAIN
):
894 elif ch
.chtype
== chunk_type(CSNAME
):
895 if s(buf
, ch
.data
) in themselves
or hist
.inargs
and s(buf
, ch
.data
) in inargsselves
:
896 ch
.chtype
= chunk_type(PLAIN
)
897 elif s(buf
, ch
.data
) == 'e':
898 ch
.chtype
= chunk_type(PLAIN
)
900 elif len(s(buf
, ch
.data
)) == 1 \
901 and s(buf
, ch
.data
) in onlylatexspecial
:
902 ch
.chtype
= chunk_type(PLAIN
)
903 # if it is followed by an empty group,
904 # remove that group, it was needed for
907 and pp
[i
].chtype
==chunk_type(GROUP
) \
908 and len(pp
[i
].data
) == 0:
912 elif s(buf
, ch
.data
) in markcmds
.keys():
913 length
, newi
= getnextarg(length
, buf
, pp
, i
)
914 str = flattext(buf
, pp
[i
:newi
])
916 length
= length
- (newi
- i
)
917 ch
.chtype
= chunk_type(PLAIN
)
918 markcmd
= s(buf
, ch
.data
)
919 x
= markcmds
[markcmd
]
920 if type(x
) == type(()):
924 str = string
.upper(str)
926 raise 'FATAL', 'corrupt markcmds'
929 if s(buf
, ch
.data
) not in ignoredcommands
:
930 print 'WARNING: deleting command ' + `
s(buf
, ch
.data
)`
931 print 'PP' + `pp
[i
-1]`
933 i
, length
= i
-1, length
-1
934 elif ch
.chtype
== chunk_type(GROUP
):
935 length
, newi
= getnextarg(length
, buf
, pp
, i
-1)
937 ## str = flattext(buf, crcopy(pp[i-1:newi]))
939 ## length = length - (newi - i)
940 ## ch.chtype = chunk_type(PLAIN)
945 dumpit(buf
, wobj
.write
, pp
)
946 ##print 'FLATTEXT: RETURNING ' + `wobj.data`
949 # try to generate node names (a bit shorter than the chapter title)
950 # note that the \nodename command (see elsewhere) overules these efforts
951 def invent_node_names(text
):
952 words
= string
.split(text
)
954 ##print 'WORDS ' + `words`
957 and string
.lower(words
[0]) == 'built-in' \
958 and string
.lower(words
[1]) not in ('modules', 'functions'):
960 if len(words
) == 3 and string
.lower(words
[1]) == 'module':
962 if len(words
) == 3 and string
.lower(words
[1]) == 'object':
963 return string
.join(words
[0:2])
964 if len(words
) > 4 and string
.lower(string
.join(words
[-4:])) == \
965 'methods and data attributes':
966 return string
.join(words
[:2])
969 re_commas_etc
= regex
.compile('[,`\'@{}]')
971 re_whitespace
= regex
.compile('[ \t]*')
974 ##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
976 # look if the next non-white stuff is also a command, resulting in skipping
977 # double endlines (DENDLINE) too, and thus omitting \par's
978 # Sometimes this is too much, maybe consider DENDLINE's as stop
979 def next_command_p(length
, buf
, pp
, i
, cmdname
):
986 if ch
.chtype
== chunk_type(ENDLINE
):
988 if ch
.chtype
== chunk_type(DENDLINE
):
990 if ch
.chtype
== chunk_type(PLAIN
):
991 if re_whitespace
.search(s(buf
, ch
.data
)) == 0 and \
992 re_whitespace
.match(s(buf
, ch
.data
)) == len(s(buf
, ch
.data
)):
995 if ch
.chtype
== chunk_type(CSNAME
):
996 if s(buf
, ch
.data
) == cmdname
:
997 return i
# _after_ the command
1002 # things that are special to LaTeX, but not to texi..
1003 onlylatexspecial
= '_~^$#&%'
1017 hist
.enumeratenesting
, hist
.itemizenesting
= 0, 0
1019 out
.doublenodes
= []
1020 out
.doublecindeces
= []
1023 spacech
= [chunk(PLAIN
, 0, ' ')]
1024 commach
= [chunk(PLAIN
, 0, ', ')]
1025 cindexch
= [chunk(CSLINE
, 0, 'cindex')]
1027 # the standard variation in symbols for itemize
1028 itemizesymbols
= ['bullet', 'minus', 'dots']
1030 # same for enumerate
1031 enumeratesymbols
= ['1', 'A', 'a']
1034 ## \begin{ {func,data,exc}desc }{name}...
1035 ## the resulting texi-code is dependent on the contents of indexsubitem
1038 # indexsubitem: `['XXX', 'function']
1040 # deffn {`idxsi`} NAME (FUNCARGS)
1042 # indexsubitem: `['XXX', 'method']`
1044 # defmethod {`idxsi[0]`} NAME (FUNCARGS)
1046 # indexsubitem: `['in', 'module', 'MODNAME']'
1048 # defcv data {`idxsi[1:]`} NAME
1050 # defcv exception {`idxsi[1:]`} NAME
1052 # deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1054 # indexsubitem: `['OBJECT', 'attribute']'
1056 # defcv attribute {`OBJECT`} NAME
1059 ## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1060 ## or \funcline{NAME}{ARGS}
1062 def do_funcdesc(length
, buf
, pp
, i
):
1066 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1067 funcname
= chunk(GROUP
, wh
, pp
[i
:newi
])
1069 length
= length
- (newi
-i
)
1072 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1075 the_args
= [chunk(PLAIN
, wh
, '()'[0])] + \
1077 [chunk(PLAIN
, wh
, '()'[1])]
1079 length
= length
- (newi
-i
)
1081 idxsi
= hist
.indexsubitem
# words
1084 if idxsi
and idxsi
[-1] in ('method', 'protocol'):
1085 command
= 'defmethod'
1086 cat_class
= string
.join(idxsi
[:-1])
1087 elif len(idxsi
) == 2 and idxsi
[1] == 'function':
1089 cat_class
= string
.join(idxsi
)
1090 elif len(idxsi
) == 3 and idxsi
[:2] == ['in', 'module']:
1092 cat_class
= 'function of ' + string
.join(idxsi
[1:])
1095 raise error
, 'don\'t know what to do with indexsubitem ' + `idxsi`
1097 ch
.chtype
= chunk_type(CSLINE
)
1100 cslinearg
= [chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, cat_class
)])]
1101 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1102 cslinearg
.append(funcname
)
1103 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1105 cslinearg
[l
:l
] = the_args
1107 pp
.insert(i
, chunk(GROUP
, wh
, cslinearg
))
1108 i
, length
= i
+1, length
+1
1109 hist
.command
= command
1113 ## this routine will be called on \begin{excdesc}{NAME}
1114 ## or \excline{NAME}
1116 def do_excdesc(length
, buf
, pp
, i
):
1120 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1121 excname
= chunk(GROUP
, wh
, pp
[i
:newi
])
1123 length
= length
- (newi
-i
)
1125 idxsi
= hist
.indexsubitem
# words
1129 if len(idxsi
) == 2 and idxsi
[1] == 'exception':
1131 cat_class
= string
.join(idxsi
)
1132 elif len(idxsi
) == 3 and idxsi
[:2] == ['in', 'module']:
1134 cat_class
= 'exception'
1135 class_class
= string
.join(idxsi
[1:])
1136 elif len(idxsi
) == 4 and idxsi
[:3] == ['exception', 'in', 'module']:
1138 cat_class
= 'exception'
1139 class_class
= string
.join(idxsi
[2:])
1143 raise error
, 'don\'t know what to do with indexsubitem ' + `idxsi`
1145 ch
.chtype
= chunk_type(CSLINE
)
1148 cslinearg
= [chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, cat_class
)])]
1149 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1151 cslinearg
.append(chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, class_class
)]))
1152 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1153 cslinearg
.append(excname
)
1155 pp
.insert(i
, chunk(GROUP
, wh
, cslinearg
))
1156 i
, length
= i
+1, length
+1
1157 hist
.command
= command
1160 ## same for datadesc or dataline...
1161 def do_datadesc(length
, buf
, pp
, i
):
1165 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1166 dataname
= chunk(GROUP
, wh
, pp
[i
:newi
])
1168 length
= length
- (newi
-i
)
1170 idxsi
= hist
.indexsubitem
# words
1174 if idxsi
[-1] in ('attribute', 'option'):
1176 cat_class
= idxsi
[-1]
1177 class_class
= string
.join(idxsi
[:-1])
1178 elif len(idxsi
) == 3 and idxsi
[:2] == ['in', 'module']:
1181 class_class
= string
.join(idxsi
[1:])
1182 elif len(idxsi
) == 4 and idxsi
[:3] == ['data', 'in', 'module']:
1185 class_class
= string
.join(idxsi
[2:])
1189 raise error
, 'don\'t know what to do with indexsubitem ' + `idxsi`
1191 ch
.chtype
= chunk_type(CSLINE
)
1194 cslinearg
= [chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, cat_class
)])]
1195 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1197 cslinearg
.append(chunk(GROUP
, wh
, [chunk(PLAIN
, wh
, class_class
)]))
1198 cslinearg
.append(chunk(PLAIN
, wh
, ' '))
1199 cslinearg
.append(dataname
)
1201 pp
.insert(i
, chunk(GROUP
, wh
, cslinearg
))
1202 i
, length
= i
+1, length
+1
1203 hist
.command
= command
1207 # regular indices: those that are not set in tt font by default....
1208 regindices
= ('cindex', )
1210 # remove illegal characters from node names
1211 def rm_commas_etc(text
):
1215 pos
= re_commas_etc
.search(text
)
1218 result
= result
+ text
[:pos
]
1221 result
= result
+ text
1224 print 'Warning: nodename changhed to ' + `result`
1233 ## changeit: the actual routine, that changes the contents of the parsed
1237 def changeit(buf
, pp
):
1238 global onlylatexspecial
, hist
, out
1240 i
, length
= 0, len(pp
)
1242 # sanity check: length should always equal len(pp)
1243 if len(pp
) != length
:
1244 raise 'FATAL', 'inconsistent length. thought ' + `length`
+ ', but should really be ' + `
len(pp
)`
1250 if type(ch
) == type(''):
1251 #normally, only chunks are present in pp,
1252 # but in some cases, some extra info
1253 # has been inserted, e.g., the \end{...} clauses
1254 raise 'FATAL', 'got string, probably too many ' + `end`
1256 if ch
.chtype
== chunk_type(GROUP
):
1257 # check for {\em ...} constructs
1259 ch
.data
[0].chtype
== chunk_type(CSNAME
) and \
1260 s(buf
, ch
.data
[0].data
) in fontchanges
.keys():
1261 k
= s(buf
, ch
.data
[0].data
)
1263 pp
.insert(i
-1, chunk(CSNAME
, ch
.where
, fontchanges
[k
]))
1264 length
, i
= length
+1, i
+1
1266 # recursively parse the contents of the group
1267 changeit(buf
, ch
.data
)
1269 elif ch
.chtype
== chunk_type(IF
):
1271 flag
, negate
, data
= ch
.data
1272 ##print 'IF: flag, negate = ' + `flag, negate`
1273 if flag
not in flags
.keys():
1274 raise error
, 'unknown flag ' + `flag`
1280 length
, i
= length
-1, i
-1
1283 length
= length
+ len(data
)
1286 elif ch
.chtype
== chunk_type(ENV
):
1288 envname
, data
= ch
.data
1290 #push this environment name on stack
1291 hist
.inenv
.insert(0, envname
)
1293 #append an endenv chunk after grouped data
1294 data
.append(chunk(ENDENV
, ch
.where
, envname
))
1299 i
, length
= i
-1, length
-1
1303 length
= length
+ len(data
)
1305 if envname
== 'verbatim':
1306 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'example'), \
1307 chunk(GROUP
, ch
.where
, [])]
1308 length
, i
= length
+2, i
+2
1310 elif envname
== 'itemize':
1311 if hist
.itemizenesting
> len(itemizesymbols
):
1312 raise error
, 'too deep itemize nesting'
1313 ingroupch
= [chunk(CSNAME
, ch
.where
,\
1314 itemizesymbols
[hist
.itemizenesting
])]
1315 hist
.itemizenesting
= hist
.itemizenesting
+ 1
1316 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'itemize'),\
1317 chunk(GROUP
, ch
.where
, ingroupch
)]
1318 length
, i
= length
+2, i
+2
1320 elif envname
== 'enumerate':
1321 if hist
.enumeratenesting
> len(enumeratesymbols
):
1322 raise error
, 'too deep enumerate nesting'
1323 ingroupch
= [chunk(PLAIN
, ch
.where
,\
1324 enumeratesymbols
[hist
.enumeratenesting
])]
1325 hist
.enumeratenesting
= hist
.enumeratenesting
+ 1
1326 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'enumerate'),\
1327 chunk(GROUP
, ch
.where
, ingroupch
)]
1328 length
, i
= length
+2, i
+2
1330 elif envname
== 'description':
1331 ingroupch
= [chunk(CSNAME
, ch
.where
, 'b')]
1332 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'table'), \
1333 chunk(GROUP
, ch
.where
, ingroupch
)]
1334 length
, i
= length
+2, i
+2
1336 elif (envname
== 'tableiii') or \
1337 (envname
== 'tableii'):
1338 if (envname
== 'tableii'):
1345 #delete tabular format description
1347 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1349 length
= length
- (newi
-i
)
1351 newcode
.append(chunk(CSLINE
, wh
, 'table'))
1352 ingroupch
= [chunk(CSNAME
, wh
, 'asis')]
1353 newcode
.append(chunk(GROUP
, wh
, ingroupch
))
1354 newcode
.append(chunk(CSLINE
, wh
, 'item'))
1356 #get the name of macro for @item
1358 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1361 raise error
, 'Sorry, expected 1 chunk argument'
1362 if pp
[i
].chtype
!= chunk_type(PLAIN
):
1363 raise error
, 'Sorry, expected plain text argument'
1364 hist
.itemargmacro
= s(buf
, pp
[i
].data
)
1366 length
= length
- (newi
-i
)
1369 for count
in range(ltable
):
1370 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1372 chunk(CSNAME
, wh
, 'emph'), \
1373 chunk(GROUP
, 0, pp
[i
:newi
])]
1375 length
= length
- (newi
-i
)
1378 elif count
== ltable
-1:
1379 itembody
= itembody
+ \
1380 [chunk(PLAIN
, wh
, ' --- ')] + \
1383 itembody
= emphgroup
1384 newcode
.append(chunk(GROUP
, wh
, itemarg
))
1385 newcode
= newcode
+ itembody
+ [chunk(DENDLINE
, wh
, '\n')]
1388 length
, i
= length
+l
, i
+l
1391 if length
!= len(pp
):
1392 raise 'STILL, SOMETHING wrong', `i`
1395 elif envname
== 'funcdesc':
1396 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ''))
1397 i
, length
= i
+1, length
+1
1398 length
, i
= do_funcdesc(length
, buf
, pp
, i
)
1400 elif envname
== 'excdesc':
1401 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ''))
1402 i
, length
= i
+1, length
+1
1403 length
, i
= do_excdesc(length
, buf
, pp
, i
)
1405 elif envname
== 'datadesc':
1406 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ''))
1407 i
, length
= i
+1, length
+1
1408 length
, i
= do_datadesc(length
, buf
, pp
, i
)
1411 print 'WARNING: don\'t know what to do with env ' + `envname`
1413 elif ch
.chtype
== chunk_type(ENDENV
):
1415 if envname
!= hist
.inenv
[0]:
1416 raise error
, '\'end\' does not match. Name ' + `envname`
+ ', expected ' + `hist
.inenv
[0]`
1419 i
, length
= i
-1, length
-1
1421 if envname
== 'verbatim':
1423 chunk(CSLINE
, ch
.where
, 'end'), \
1424 chunk(GROUP
, ch
.where
, [\
1425 chunk(PLAIN
, ch
.where
, 'example')])]
1426 i
, length
= i
+2, length
+2
1427 elif envname
== 'itemize':
1428 hist
.itemizenesting
= hist
.itemizenesting
- 1
1430 chunk(CSLINE
, ch
.where
, 'end'), \
1431 chunk(GROUP
, ch
.where
, [\
1432 chunk(PLAIN
, ch
.where
, 'itemize')])]
1433 i
, length
= i
+2, length
+2
1434 elif envname
== 'enumerate':
1435 hist
.enumeratenesting
= hist
.enumeratenesting
-1
1437 chunk(CSLINE
, ch
.where
, 'end'), \
1438 chunk(GROUP
, ch
.where
, [\
1439 chunk(PLAIN
, ch
.where
, 'enumerate')])]
1440 i
, length
= i
+2, length
+2
1441 elif envname
== 'description':
1443 chunk(CSLINE
, ch
.where
, 'end'), \
1444 chunk(GROUP
, ch
.where
, [\
1445 chunk(PLAIN
, ch
.where
, 'table')])]
1446 i
, length
= i
+2, length
+2
1447 elif (envname
== 'tableiii') or (envname
== 'tableii'):
1449 chunk(CSLINE
, ch
.where
, 'end'), \
1450 chunk(GROUP
, ch
.where
, [\
1451 chunk(PLAIN
, ch
.where
, 'table')])]
1452 i
, length
= i
+2, length
+ 2
1453 pp
.insert(i
, chunk(DENDLINE
, ch
.where
, '\n'))
1454 i
, length
= i
+1, length
+1
1456 elif envname
in ('funcdesc', 'excdesc', 'datadesc'):
1458 chunk(CSLINE
, ch
.where
, 'end'), \
1459 chunk(GROUP
, ch
.where
, [\
1460 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1461 i
, length
= i
+2, length
+2
1463 print 'WARNING: ending env ' + `envname`
+ 'has no actions'
1465 elif ch
.chtype
== chunk_type(CSNAME
):
1466 # control name transformations
1467 if s(buf
, ch
.data
) == 'optional':
1468 pp
[i
-1].chtype
= chunk_type (PLAIN
)
1470 if (i
< length
) and \
1471 (pp
[i
].chtype
== chunk_type(GROUP
)):
1474 chunk(PLAIN
, ch
.where
, ']')]
1475 length
= length
+len(cp
)
1476 elif s(buf
, ch
.data
) in ignoredcommands
:
1478 i
, length
= i
-1, length
-1
1479 elif s(buf
, ch
.data
) == '@' and \
1481 pp
[i
].chtype
== chunk_type(PLAIN
) and \
1482 s(buf
, pp
[i
].data
)[0] == '.':
1487 elif s(buf
, ch
.data
) == '\\':
1490 elif len(s(buf
, ch
.data
)) == 1 and \
1491 s(buf
, ch
.data
) in onlylatexspecial
:
1492 ch
.chtype
= chunk_type(PLAIN
)
1493 # check if such a command is followed by
1494 # an empty group: e.g., `\%{}'. If so, remove
1495 # this empty group too
1497 pp
[i
].chtype
== chunk_type(GROUP
) \
1498 and len(pp
[i
].data
) == 0:
1502 elif hist
.inargs
and s(buf
, ch
.data
) in inargsselves
:
1503 # This is the special processing of the
1504 # arguments of the \begin{funcdesc}... or
1505 # \funcline... arguments
1506 # \, --> , \[ --> [, \] --> ]
1507 ch
.chtype
= chunk_type(PLAIN
)
1509 elif s(buf
, ch
.data
) == 'renewcommand':
1510 # \renewcommand{\indexsubitem}....
1511 i
, length
= i
-1, length
-1
1513 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1516 and pp
[i
].chtype
== chunk_type(CSNAME
) \
1517 and s(buf
, pp
[i
].data
) == 'indexsubitem':
1519 length
= length
- (newi
-i
)
1520 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1521 text
= flattext(buf
, pp
[i
:newi
])
1522 if text
[:1] != '(' or text
[-1:] != ')':
1523 raise error
, 'expected indexsubitme enclosed in braces'
1524 words
= string
.split(text
[1:-1])
1525 hist
.indexsubitem
= words
1528 print 'WARNING: renewcommand with unsupported arg removed'
1530 length
= length
- (newi
-i
)
1532 elif s(buf
, ch
.data
) == 'item':
1533 ch
.chtype
= chunk_type(CSLINE
)
1534 length
, newi
= getoptarg(length
, buf
, pp
, i
)
1535 ingroupch
= pp
[i
:newi
]
1537 length
= length
- (newi
-i
)
1538 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1539 i
, length
= i
+1, length
+1
1541 elif s(buf
, ch
.data
) == 'ttindex':
1542 idxsi
= hist
.indexsubitem
1545 if len(idxsi
) >= 2 and idxsi
[1] in \
1546 ('method', 'function', 'protocol'):
1548 elif len(idxsi
) >= 2 and idxsi
[1] in \
1549 ('exception', 'object'):
1552 print 'WARNING: can\'t categorize ' + `idxsi`
+ ' for \'ttindex\' command'
1556 cat_class
= '('+string
.join(idxsi
)+')'
1558 ch
.chtype
= chunk_type(CSLINE
)
1561 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1564 length
= length
- (newi
-i
)
1566 cat_arg
= [chunk(PLAIN
, ch
.where
, cat_class
)]
1568 # determine what should be set in roman, and
1570 if command
in regindices
:
1572 arg
= [chunk(CSNAME
, ch
.where
, 't'), \
1573 chunk(GROUP
, ch
.where
, arg
)]
1575 cat_arg
= [chunk(CSNAME
, ch
.where
, 'r'), \
1576 chunk(GROUP
, ch
.where
, cat_arg
)]
1579 [chunk(PLAIN
, ch
.where
, ' ')] + \
1582 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1583 length
, i
= length
+1, i
+1
1586 elif s(buf
, ch
.data
) == 'ldots':
1587 # \ldots --> \dots{} --> @dots{}
1590 or pp
[i
].chtype
!= chunk_type(GROUP
) \
1591 or pp
[i
].data
!= []:
1592 pp
.insert(i
, chunk(GROUP
, ch
.where
, []))
1593 i
, length
= i
+1, length
+1
1594 elif s(buf
, ch
.data
) in wordsselves
:
1596 ch
.chtype
= chunk_type(PLAIN
)
1598 and pp
[i
].chtype
== chunk_type(GROUP
) \
1599 and pp
[i
].data
== []:
1602 elif s(buf
, ch
.data
) in for_texi
:
1605 elif s(buf
, ch
.data
) == 'e':
1608 ch
.chtype
= chunk_type(PLAIN
)
1609 elif (s(buf
, ch
.data
) == 'lineiii') or\
1610 (s(buf
, ch
.data
) == 'lineii'):
1611 # This is the most tricky one
1612 # \lineiii{a1}{a2}[{a3}] -->
1613 # @item @<cts. of itemargmacro>{a1}
1616 ##print 'LINEIIIIII!!!!!!!'
1617 ## wobj = Wobj().init()
1618 ## dumpit(buf, wobj.write, pp[i-1:i+5])
1619 ## print '--->' + wobj.data + '<----'
1622 'no environment for lineiii'
1623 if (hist
.inenv
[0] != 'tableiii') and\
1624 (hist
.inenv
[0] != 'tableii'):
1626 'wrong command (' + \
1628 ') in wrong environment (' \
1629 + `hist
.inenv
[0]`
+ ')'
1630 ch
.chtype
= chunk_type(CSLINE
)
1632 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1633 ingroupch
= [chunk(CSNAME
, 0, \
1634 hist
.itemargmacro
), \
1635 chunk(GROUP
, 0, pp
[i
:newi
])]
1637 length
= length
- (newi
-i
)
1638 ## print 'ITEM ARG: --->',
1639 ## wobj = Wobj().init()
1640 ## dumpit(buf, wobj.write, ingroupch)
1641 ## print wobj.data, '<---'
1642 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1644 i
, length
= i
+1, length
+1
1645 length
, i
= getnextarg(length
, buf
, pp
, i
)
1646 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1649 pp
.insert(i
, chunk(PLAIN
, ch
.where
, ' --- '))
1652 ## pp[grouppos].data = pp[grouppos].data \
1653 ## + [chunk(PLAIN, ch.where, ' ')] \
1656 ## length = length - (newi-i)
1657 if length
!= len(pp
):
1658 raise 'IN LINEIII IS THE ERR', `i`
1660 elif s(buf
, ch
.data
) in ('chapter', 'section', 'subsection', 'subsubsection'):
1661 #\xxxsection{A} ---->
1664 ## also: remove commas and quotes
1665 ch
.chtype
= chunk_type(CSLINE
)
1666 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1667 afternodenamecmd
= next_command_p(length
, buf
, pp
, newi
, 'nodename')
1668 if afternodenamecmd
< 0:
1669 cp1
= crcopy(pp
[i
:newi
])
1671 chunk(GROUP
, ch
.where
, \
1673 length
, newi
= length
- (newi
-i
) + 1, \
1675 text
= flattext(buf
, cp1
)
1676 text
= invent_node_names(text
)
1678 length
, endarg
= getnextarg(length
, buf
, pp
, afternodenamecmd
)
1679 cp1
= crcopy(pp
[afternodenamecmd
:endarg
])
1681 length
= length
- (endarg
-newi
)
1684 chunk(GROUP
, ch
.where
, \
1686 length
, newi
= length
- (newi
-i
) + 1, \
1688 text
= flattext(buf
, cp1
)
1691 ## print 'FLATTEXT:', `text`
1692 if text
in hist
.nodenames
:
1693 print 'WARNING: node name ' + `text`
+ ' already used'
1694 out
.doublenodes
.append(text
)
1696 hist
.nodenames
.append(text
)
1697 text
= rm_commas_etc(text
)
1699 chunk(CSLINE
, ch
.where
, 'node'), \
1700 chunk(GROUP
, ch
.where
, [\
1701 chunk(PLAIN
, ch
.where
, text
+', , ,')\
1703 i
, length
= newi
+2, length
+2
1705 elif s(buf
,ch
.data
) == 'funcline':
1706 # fold it to a very short environment
1708 chunk(CSLINE
, ch
.where
, 'end'), \
1709 chunk(GROUP
, ch
.where
, [\
1710 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1711 i
, length
= i
+2, length
+2
1712 length
, i
= do_funcdesc(length
, buf
, pp
, i
)
1714 elif s(buf
,ch
.data
) == 'dataline':
1716 chunk(CSLINE
, ch
.where
, 'end'), \
1717 chunk(GROUP
, ch
.where
, [\
1718 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1719 i
, length
= i
+2, length
+2
1720 length
, i
= do_datadesc(length
, buf
, pp
, i
)
1722 elif s(buf
,ch
.data
) == 'excline':
1724 chunk(CSLINE
, ch
.where
, 'end'), \
1725 chunk(GROUP
, ch
.where
, [\
1726 chunk(PLAIN
, ch
.where
, hist
.command
)])]
1727 i
, length
= i
+2, length
+2
1728 length
, i
= do_excdesc(length
, buf
, pp
, i
)
1731 elif s(buf
, ch
.data
) == 'index':
1734 ch
.chtype
= chunk_type(CSLINE
)
1736 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1738 ingroupch
= pp
[i
:newi
]
1740 length
= length
- (newi
-i
)
1741 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1742 length
, i
= length
+1, i
+1
1744 elif s(buf
, ch
.data
) == 'bifuncindex':
1745 ch
.chtype
= chunk_type(CSLINE
)
1747 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1748 ingroupch
= pp
[i
:newi
]
1750 length
= length
- (newi
-i
)
1752 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1753 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1754 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1755 chunk(PLAIN
, ch
.where
, \
1756 '(built-in function)')]))
1758 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1759 length
, i
= length
+1, i
+1
1762 elif s(buf
, ch
.data
) == 'obindex':
1763 ch
.chtype
= chunk_type(CSLINE
)
1765 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1766 ingroupch
= pp
[i
:newi
]
1768 length
= length
- (newi
-i
)
1770 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1771 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1772 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1773 chunk(PLAIN
, ch
.where
, \
1776 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1777 length
, i
= length
+1, i
+1
1780 elif s(buf
, ch
.data
) == 'opindex':
1781 ch
.chtype
= chunk_type(CSLINE
)
1783 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1784 ingroupch
= pp
[i
:newi
]
1786 length
= length
- (newi
-i
)
1788 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1789 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1790 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1791 chunk(PLAIN
, ch
.where
, \
1794 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1795 length
, i
= length
+1, i
+1
1798 elif s(buf
, ch
.data
) == 'bimodindex':
1799 ch
.chtype
= chunk_type(CSLINE
)
1801 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1802 ingroupch
= pp
[i
:newi
]
1804 length
= length
- (newi
-i
)
1806 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1807 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1808 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1809 chunk(PLAIN
, ch
.where
, \
1812 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1813 length
, i
= length
+1, i
+1
1815 elif s(buf
, ch
.data
) == 'sectcode':
1819 elif s(buf
, ch
.data
) == 'stmodindex':
1820 ch
.chtype
= chunk_type(CSLINE
)
1821 # use the program index as module index
1823 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1824 ingroupch
= pp
[i
:newi
]
1826 length
= length
- (newi
-i
)
1828 ingroupch
.append(chunk(PLAIN
, ch
.where
, ' '))
1829 ingroupch
.append(chunk(CSNAME
, ch
.where
, 'r'))
1830 ingroupch
.append(chunk(GROUP
, ch
.where
, [\
1831 chunk(PLAIN
, ch
.where
, \
1834 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1835 length
, i
= length
+1, i
+1
1838 elif s(buf
, ch
.data
) == 'stindex':
1839 # XXX must actually go to newindex st
1841 ch
.chtype
= chunk_type(CSLINE
)
1843 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1844 ingroupch
= [chunk(CSNAME
, wh
, 'code'), \
1845 chunk(GROUP
, wh
, pp
[i
:newi
])]
1848 length
= length
- (newi
-i
)
1851 t
.append(chunk(PLAIN
, wh
, ' statement'))
1853 pp
.insert(i
, chunk(GROUP
, wh
, t
))
1854 i
, length
= i
+1, length
+1
1856 pp
.insert(i
, chunk(CSLINE
, wh
, 'cindex'))
1857 i
, length
= i
+1, length
+1
1860 t
.insert(0, chunk(PLAIN
, wh
, 'statement, '))
1862 pp
.insert(i
, chunk(GROUP
, wh
, t
))
1863 i
, length
= i
+1, length
+1
1866 elif s(buf
, ch
.data
) == 'indexii':
1867 #\indexii{A}{B} --->
1870 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1872 cp21
= crcopy(pp
[i
:newi
])
1874 length
= length
- (newi
-i
)
1875 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1877 cp22
= crcopy(pp
[i
:newi
])
1879 length
= length
- (newi
-i
)
1881 ch
.chtype
= chunk_type(CSLINE
)
1883 pp
.insert(i
, chunk(GROUP
, ch
.where
, cp11
+ [\
1884 chunk(PLAIN
, ch
.where
, ' ')] + cp12
))
1885 i
, length
= i
+1, length
+1
1886 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'cindex'), \
1887 chunk(GROUP
, ch
.where
, cp22
+ [\
1888 chunk(PLAIN
, ch
.where
, ', ')]+ cp21
)]
1889 i
, length
= i
+2, length
+2
1891 elif s(buf
, ch
.data
) == 'indexiii':
1892 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1894 cp21
= crcopy(pp
[i
:newi
])
1895 cp31
= crcopy(pp
[i
:newi
])
1897 length
= length
- (newi
-i
)
1898 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1900 cp22
= crcopy(pp
[i
:newi
])
1901 cp32
= crcopy(pp
[i
:newi
])
1903 length
= length
- (newi
-i
)
1904 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1906 cp23
= crcopy(pp
[i
:newi
])
1907 cp33
= crcopy(pp
[i
:newi
])
1909 length
= length
- (newi
-i
)
1911 ch
.chtype
= chunk_type(CSLINE
)
1913 pp
.insert(i
, chunk(GROUP
, ch
.where
, cp11
+ [\
1914 chunk(PLAIN
, ch
.where
, ' ')] + cp12 \
1915 + [chunk(PLAIN
, ch
.where
, ' ')] \
1917 i
, length
= i
+1, length
+1
1918 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'cindex'), \
1919 chunk(GROUP
, ch
.where
, cp22
+ [\
1920 chunk(PLAIN
, ch
.where
, ' ')]+ cp23\
1921 + [chunk(PLAIN
, ch
.where
, ', ')] +\
1923 i
, length
= i
+2, length
+2
1924 pp
[i
:i
] = [chunk(CSLINE
, ch
.where
, 'cindex'), \
1925 chunk(GROUP
, ch
.where
, cp33
+ [\
1926 chunk(PLAIN
, ch
.where
, ', ')]+ cp31\
1927 + [chunk(PLAIN
, ch
.where
, ' ')] +\
1929 i
, length
= i
+2, length
+2
1932 elif s(buf
, ch
.data
) == 'indexiv':
1933 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1935 cp21
= crcopy(pp
[i
:newi
])
1936 cp31
= crcopy(pp
[i
:newi
])
1937 cp41
= crcopy(pp
[i
:newi
])
1939 length
= length
- (newi
-i
)
1940 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1942 cp22
= crcopy(pp
[i
:newi
])
1943 cp32
= crcopy(pp
[i
:newi
])
1944 cp42
= crcopy(pp
[i
:newi
])
1946 length
= length
- (newi
-i
)
1947 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1949 cp23
= crcopy(pp
[i
:newi
])
1950 cp33
= crcopy(pp
[i
:newi
])
1951 cp43
= crcopy(pp
[i
:newi
])
1953 length
= length
- (newi
-i
)
1954 length
, newi
= getnextarg(length
, buf
, pp
, i
)
1956 cp24
= crcopy(pp
[i
:newi
])
1957 cp34
= crcopy(pp
[i
:newi
])
1958 cp44
= crcopy(pp
[i
:newi
])
1960 length
= length
- (newi
-i
)
1962 ch
.chtype
= chunk_type(CSLINE
)
1964 ingroupch
= cp11
+ \
1968 pp
.insert(i
, chunk(GROUP
, ch
.where
, ingroupch
))
1969 i
, length
= i
+1, length
+1
1970 ingroupch
= cp22
+ \
1974 pp
[i
:i
] = cindexch
+ [\
1975 chunk(GROUP
, ch
.where
, ingroupch
)]
1976 i
, length
= i
+2, length
+2
1977 ingroupch
= cp33
+ \
1981 pp
[i
:i
] = cindexch
+ [\
1982 chunk(GROUP
, ch
.where
, ingroupch
)]
1983 i
, length
= i
+2, length
+2
1984 ingroupch
= cp44
+ \
1988 pp
[i
:i
] = cindexch
+ [\
1989 chunk(GROUP
, ch
.where
, ingroupch
)]
1990 i
, length
= i
+2, length
+2
1995 print 'don\'t know what to do with keyword ' + `
s(buf
, ch
.data
)`
1999 re_atsign
= regex
.compile('[@{}]')
2000 re_newline
= regex
.compile('\n')
2002 def dumpit(buf
, wm
, pp
):
2006 i
, length
= 0, len(pp
)
2011 if len(pp
) != length
:
2012 raise 'FATAL', 'inconsistent length'
2024 if ch
.chtype
== chunk_type(CSNAME
):
2025 wm('@' + s(buf
, ch
.data
))
2026 if s(buf
, ch
.data
) == 'node' and \
2027 pp
[i
].chtype
== chunk_type(PLAIN
) and \
2028 s(buf
, pp
[i
].data
) in out
.doublenodes
:
2029 ##XXX doesnt work yet??
2030 wm(' ZZZ-' + zfill(`i`
, 4))
2031 if s(buf
, ch
.data
)[0] in string
.letters
:
2033 elif ch
.chtype
== chunk_type(PLAIN
):
2034 if dospace
and s(buf
, ch
.data
) not in (' ', '\t'):
2036 text
= s(buf
, ch
.data
)
2038 pos
= re_atsign
.search(text
)
2041 wm(text
[:pos
] + '@' + text
[pos
])
2044 elif ch
.chtype
== chunk_type(GROUP
):
2046 dumpit(buf
, wm
, ch
.data
)
2048 elif ch
.chtype
== chunk_type(DENDLINE
):
2050 while i
!= length
and pp
[i
].chtype
in \
2051 (chunk_type(DENDLINE
), chunk_type(ENDLINE
)):
2053 elif ch
.chtype
== chunk_type(OTHER
):
2055 elif ch
.chtype
== chunk_type(ACTIVE
):
2057 elif ch
.chtype
== chunk_type(ENDLINE
):
2059 elif ch
.chtype
== chunk_type(CSLINE
):
2060 if i
>= 2 and pp
[i
-2].chtype
not in \
2061 (chunk_type(ENDLINE
), chunk_type(DENDLINE
)) \
2062 and (pp
[i
-2].chtype
!= chunk_type(PLAIN
) \
2063 or s(buf
, pp
[i
-2].data
)[-1] != '\n'):
2066 wm('@' + s(buf
, ch
.data
))
2068 raise error
, 'CSLINE expected another chunk'
2069 if pp
[i
].chtype
!= chunk_type(GROUP
):
2070 raise error
, 'CSLINE expected GROUP'
2071 if type(pp
[i
].data
) != type([]):
2072 raise error
, 'GROUP chould contain []-data'
2074 wobj
= Wobj().init()
2075 dumpit(buf
, wobj
.write
, pp
[i
].data
)
2082 pos
= re_newline
.search(text
)
2085 print 'WARNING: found newline in csline arg'
2086 wm(text
[:pos
] + ' ')
2090 pp
[i
].chtype
not in (chunk_type(CSLINE
), \
2091 chunk_type(ENDLINE
), chunk_type(DENDLINE
)) \
2092 and (pp
[i
].chtype
!= chunk_type(PLAIN
) \
2093 or s(buf
, pp
[i
].data
)[0] != '\n'):
2096 elif ch
.chtype
== chunk_type(COMMENT
):
2097 ## print 'COMMENT: previous chunk =', pp[i-2]
2098 ## if pp[i-2].chtype == chunk_type(PLAIN):
2099 ## print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
2100 if s(buf
, ch
.data
) and \
2101 regex
.match('^[ \t]*$', s(buf
, ch
.data
)) < 0:
2102 if i
>= 2 and pp
[i
-2].chtype
not in \
2103 (chunk_type(ENDLINE
), chunk_type(DENDLINE
)) \
2104 and not (pp
[i
-2].chtype
== chunk_type(PLAIN
) \
2105 and regex
.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf
, pp
[i
-2].data
)) >= 0):
2106 print 'ADDING NEWLINE'
2108 wm('@c ' + s(buf
, ch
.data
))
2109 elif ch
.chtype
== chunk_type(IGNORE
):
2113 str = `
s(buf
, ch
.data
)`
2117 str = str[:400] + '...'
2118 print 'warning:', ch
.chtype
, 'not handled, data ' + str
2124 headerfile
= 'texipre.dat'
2125 trailerfile
= 'texipost.dat'
2128 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'o:h:t:')
2129 except getopt
.error
:
2133 print 'usage: partparse [-o outfile] [-h headerfile]',
2134 print '[-t trailerfile] file ...'
2137 for opt
, arg
in opts
:
2138 if opt
== '-o': outfile
= arg
2139 if opt
== '-h': headerfile
= arg
2140 if opt
== '-t': trailerfile
= arg
2143 root
, ext
= os
.path
.splitext(args
[0])
2144 outfile
= root
+ '.texi'
2147 print 'will not overwrite input file', outfile
2150 outf
= open(outfile
, 'w')
2151 outf
.write(open(headerfile
, 'r').read())
2154 if len(args
) > 1: print '='*20, file, '='*20
2155 buf
= open(file, 'r').read()
2156 w
, pp
= parseit(buf
)
2159 dumpit(buf
, outf
.write
, pp
)
2161 outf
.write(open(trailerfile
, 'r').read())