(py-outdent-p): new function
[python/dscho.git] / Doc / partparse.py
bloba62d6947f199dcbf98fd7b049eacb1ea74c79000
2 # partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
3 # and generate texinfo source.
5 # This is *not* a good example of good programming practices. In fact, this
6 # file could use a complete rewrite, in order to become faster, more
7 # easy extensible and maintainable.
9 # However, I added some comments on a few places for the pityful person who
10 # would ever need to take a look into this file.
12 # Have I been clear enough??
14 # -jh
17 import sys, string, regex, getopt, os
19 # Different parse modes for phase 1
20 MODE_REGULAR = 0
21 MODE_VERBATIM = 1
22 MODE_CS_SCAN = 2
23 MODE_COMMENT = 3
24 MODE_MATH = 4
25 MODE_DMATH = 5
26 MODE_GOBBLEWHITE = 6
28 the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
29 MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
31 # Show the neighbourhood of the scanned buffer
32 def epsilon(buf, where):
33 wmt, wpt = where - 10, where + 10
34 if wmt < 0:
35 wmt = 0
36 if wpt > len(buf):
37 wpt = len(buf)
38 return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
40 # Should return the line number. never worked
41 def lin():
42 global lineno
43 return ' Line ' + `lineno` + '.'
45 # Displays the recursion level.
46 def lv(lvl):
47 return ' Level ' + `lvl` + '.'
49 # Combine the three previous functions. Used often.
50 def lle(lvl, buf, where):
51 return lv(lvl) + lin() + epsilon(buf, where)
54 # This class is only needed for _symbolic_ representation of the parse mode.
55 class Mode:
56 def init(self, arg):
57 if arg not in the_modes:
58 raise ValueError, 'mode not in the_modes'
59 self.mode = arg
60 return self
62 def __cmp__(self, other):
63 if type(self) != type(other):
64 other = mode(other)
65 return cmp(self.mode, other.mode)
67 def __repr__(self):
68 if self.mode == MODE_REGULAR:
69 return 'MODE_REGULAR'
70 elif self.mode == MODE_VERBATIM:
71 return 'MODE_VERBATIM'
72 elif self.mode == MODE_CS_SCAN:
73 return 'MODE_CS_SCAN'
74 elif self.mode == MODE_COMMENT:
75 return 'MODE_COMMENT'
76 elif self.mode == MODE_MATH:
77 return 'MODE_MATH'
78 elif self.mode == MODE_DMATH:
79 return 'MODE_DMATH'
80 elif self.mode == MODE_GOBBLEWHITE:
81 return 'MODE_GOBBLEWHITE'
82 else:
83 raise ValueError, 'mode not in the_modes'
85 # just a wrapper around a class initialisation
86 def mode(arg):
87 return Mode().init(arg)
90 # After phase 1, the text consists of chunks, with a certain type
91 # this type will be assigned to the chtype member of the chunk
92 # the where-field contains the file position where this is found
93 # and the data field contains (1): a tuple describing start- end end
94 # positions of the substring (can be used as slice for the buf-variable),
95 # (2) just a string, mostly generated by the changeit routine,
96 # or (3) a list, describing a (recursive) subgroup of chunks
97 PLAIN = 0 # ASSUME PLAINTEXT, data = the text
98 GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
99 CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
100 COMMENT = 3 # data is the actual comment
101 DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
102 MATH = 5 # MATH, see DISPLAYMATH
103 OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
104 ACTIVE = 7 # ACTIVE CHAR
105 GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
106 ENDLINE = 9 # END-OF-LINE, data = '\n'
107 DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
108 ENV = 11 # LaTeX-environment
109 # data =(envname,[ch,ch,ch,.])
110 CSLINE = 12 # for texi: next chunk will be one group
111 # of args. Will be set all on 1 line
112 IGNORE = 13 # IGNORE this data
113 ENDENV = 14 # TEMP END OF GROUP INDICATOR
114 IF = 15 # IF-directive
115 # data = (flag,negate,[ch, ch, ch,...])
116 the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
117 GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
119 # class, just to display symbolic name
120 class ChunkType:
121 def init(self, chunk_type):
122 if chunk_type not in the_types:
123 raise 'ValueError', 'chunk_type not in the_types'
124 self.chunk_type = chunk_type
125 return self
127 def __cmp__(self, other):
128 if type(self) != type(other):
129 other = chunk_type(other)
130 return cmp(self.chunk_type, other.chunk_type)
132 def __repr__(self):
133 if self.chunk_type == PLAIN:
134 return 'PLAIN'
135 elif self.chunk_type == GROUP:
136 return 'GROUP'
137 elif self.chunk_type == CSNAME:
138 return 'CSNAME'
139 elif self.chunk_type == COMMENT:
140 return 'COMMENT'
141 elif self.chunk_type == DMATH:
142 return 'DMATH'
143 elif self.chunk_type == MATH:
144 return 'MATH'
145 elif self.chunk_type == OTHER:
146 return 'OTHER'
147 elif self.chunk_type == ACTIVE:
148 return 'ACTIVE'
149 elif self.chunk_type == GOBBLEDWHITE:
150 return 'GOBBLEDWHITE'
151 elif self.chunk_type == DENDLINE:
152 return 'DENDLINE'
153 elif self.chunk_type == ENDLINE:
154 return 'ENDLINE'
155 elif self.chunk_type == ENV:
156 return 'ENV'
157 elif self.chunk_type == CSLINE:
158 return 'CSLINE'
159 elif self.chunk_type == IGNORE:
160 return 'IGNORE'
161 elif self.chunk_type == ENDENV:
162 return 'ENDENV'
163 elif self.chunk_type == IF:
164 return 'IF'
165 else:
166 raise ValueError, 'chunk_type not in the_types'
168 # ...and the wrapper
169 def chunk_type(type):
170 return ChunkType().init(type)
172 # store a type object of the ChunkType-class-instance...
173 chunk_type_type = type(chunk_type(0))
175 # this class contains a part of the parsed buffer
176 class Chunk:
177 def init(self, chtype, where, data):
178 if type(chtype) != chunk_type_type:
179 chtype = chunk_type(chtype)
180 self.chtype = chtype
181 if type(where) != type(0):
182 raise TypeError, '\'where\' is not a number'
183 self.where = where
184 self.data = data
185 ##print 'CHUNK', self
186 return self
188 def __repr__(self):
189 return 'chunk' + `self.chtype, self.where, self.data`
191 # and the wrapper
192 def chunk(chtype, where, data):
193 return Chunk().init(chtype, where, data)
197 error = 'partparse.error'
200 # TeX's catcodes...
202 CC_ESCAPE = 0
203 CC_LBRACE = 1
204 CC_RBRACE = 2
205 CC_MATHSHIFT = 3
206 CC_ALIGNMENT = 4
207 CC_ENDLINE = 5
208 CC_PARAMETER = 6
209 CC_SUPERSCRIPT = 7
210 CC_SUBSCRIPT = 8
211 CC_IGNORE = 9
212 CC_WHITE = 10
213 CC_LETTER = 11
214 CC_OTHER = 12
215 CC_ACTIVE = 13
216 CC_COMMENT = 14
217 CC_INVALID = 15
219 # and the names
220 cc_names = [\
221 'CC_ESCAPE', \
222 'CC_LBRACE', \
223 'CC_RBRACE', \
224 'CC_MATHSHIFT', \
225 'CC_ALIGNMENT', \
226 'CC_ENDLINE', \
227 'CC_PARAMETER', \
228 'CC_SUPERSCRIPT', \
229 'CC_SUBSCRIPT', \
230 'CC_IGNORE', \
231 'CC_WHITE', \
232 'CC_LETTER', \
233 'CC_OTHER', \
234 'CC_ACTIVE', \
235 'CC_COMMENT', \
236 'CC_INVALID', \
239 # Show a list of catcode-name-symbols
240 def pcl(codelist):
241 result = ''
242 for i in codelist:
243 result = result + cc_names[i] + ', '
244 return '[' + result[:-2] + ']'
246 # the name of the catcode (ACTIVE, OTHER, etc.)
247 def pc(code):
248 return cc_names[code]
251 # Which catcodes make the parser stop parsing regular plaintext
252 regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
253 CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
254 CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
256 # same for scanning a control sequence name
257 csname_scancodes = [CC_LETTER]
259 # same for gobbling LWSP
260 white_scancodes = [CC_WHITE]
261 ##white_scancodes = [CC_WHITE, CC_ENDLINE]
263 # make a list of all catcode id's, except for catcode ``other''
264 all_but_other_codes = range(16)
265 del all_but_other_codes[CC_OTHER]
266 ##print all_but_other_codes
268 # when does a comment end
269 comment_stopcodes = [CC_ENDLINE]
271 # gather all characters together, specified by a list of catcodes
272 def code2string(cc, codelist):
273 ##print 'code2string: codelist = ' + pcl(codelist),
274 result = ''
275 for category in codelist:
276 if cc[category]:
277 result = result + cc[category]
278 ##print 'result = ' + `result`
279 return result
281 # automatically generate all characters of catcode other, being the
282 # complement set in the ASCII range (128 characters)
283 def make_other_codes(cc):
284 otherchars = range(256) # could be made 256, no problem
285 for category in all_but_other_codes:
286 if cc[category]:
287 for c in cc[category]:
288 otherchars[ord(c)] = None
289 result = ''
290 for i in otherchars:
291 if i != None:
292 result = result + chr(i)
293 return result
295 # catcode dump (which characters have which catcodes).
296 def dump_cc(name, cc):
297 ##print '\t' + name
298 ##print '=' * (8+len(name))
299 if len(cc) != 16:
300 raise TypeError, 'cc not good cat class'
301 ## for i in range(16):
302 ## print pc(i) + '\t' + `cc[i]`
305 # In the beginning,....
306 epoch_cc = [None] * 16
307 ##dump_cc('epoch_cc', epoch_cc)
310 # INITEX
311 initex_cc = epoch_cc[:]
312 initex_cc[CC_ESCAPE] = '\\'
313 initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
314 '\n', '\0', ' '
315 initex_cc[CC_LETTER] = string.uppercase + string.lowercase
316 initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
317 #initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
318 ##dump_cc('initex_cc', initex_cc)
321 # LPLAIN: LaTeX catcode setting (see lplain.tex)
322 lplain_cc = initex_cc[:]
323 lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
324 lplain_cc[CC_MATHSHIFT] = '$'
325 lplain_cc[CC_ALIGNMENT] = '&'
326 lplain_cc[CC_PARAMETER] = '#'
327 lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
328 lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
329 lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
330 lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
331 lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
332 ##dump_cc('lplain_cc', lplain_cc)
335 # Guido's LaTeX environment catcoded '_' as ``other''
336 # my own purpose catlist
337 my_cc = lplain_cc[:]
338 my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
339 my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
340 dump_cc('my_cc', my_cc)
344 # needed for un_re, my equivalent for regexp-quote in Emacs
345 re_meaning = '\\[]^$'
347 def un_re(str):
348 result = ''
349 for i in str:
350 if i in re_meaning:
351 result = result + '\\'
352 result = result + i
353 return result
355 # NOTE the negate ('^') operator in *some* of the regexps below
356 def make_rc_regular(cc):
357 # problems here if '[]' are included!!
358 return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
360 def make_rc_cs_scan(cc):
361 return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
363 def make_rc_comment(cc):
364 return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
366 def make_rc_endwhite(cc):
367 return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
371 # regular: normal mode:
372 rc_regular = make_rc_regular(my_cc)
374 # scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
375 rc_cs_scan = make_rc_cs_scan(my_cc)
376 rc_comment = make_rc_comment(my_cc)
377 rc_endwhite = make_rc_endwhite(my_cc)
380 # parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
381 # RECURSION-LEVEL will is incremented on entry.
382 # result contains the list of chunks returned
383 # together with this list, the buffer position is returned
385 # RECURSION-LEVEL will be set to zero *again*, when recursively a
386 # {,D}MATH-mode scan has been enetered.
387 # This has been done in order to better check for environment-mismatches
389 def parseit(buf, *rest):
390 global lineno
392 if len(rest) == 3:
393 parsemode, start, lvl = rest
394 elif len(rest) == 2:
395 parsemode, start, lvl = rest + (0, )
396 elif len(rest) == 1:
397 parsemode, start, lvl = rest + (0, 0)
398 elif len(rest) == 0:
399 parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
400 else:
401 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
402 result = []
403 end = len(buf)
404 if lvl == 0 and parsemode == mode(MODE_REGULAR):
405 lineno = 1
406 lvl = lvl + 1
408 ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
411 # some of the more regular modes...
414 if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
415 cstate = []
416 newpos = start
417 curpmode = parsemode
418 while 1:
419 where = newpos
420 #print '\tnew round: ' + epsilon(buf, where)
421 if where == end:
422 if lvl > 1 or curpmode != mode(MODE_REGULAR):
423 # not the way we started...
424 raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
425 # the real ending of lvl-1 parse
426 return end, result
428 pos = rc_regular.search(buf, where)
430 if pos < 0:
431 pos = end
433 if pos != where:
434 newpos, c = pos, chunk(PLAIN, where, (where, pos))
435 result.append(c)
436 continue
440 # ok, pos == where and pos != end
442 foundchar = buf[where]
443 if foundchar in my_cc[CC_LBRACE]:
444 # recursive subgroup parse...
445 newpos, data = parseit(buf, curpmode, where+1, lvl)
446 result.append(chunk(GROUP, where, data))
448 elif foundchar in my_cc[CC_RBRACE]:
449 if lvl <= 1:
450 raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
451 if lvl == 1 and mode != mode(MODE_REGULAR):
452 raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
453 return where + 1, result
455 elif foundchar in my_cc[CC_ESCAPE]:
457 # call the routine that actually deals with
458 # this problem. If do_ret is None, than
459 # return the value of do_ret
461 # Note that handle_cs might call this routine
462 # recursively again...
464 do_ret, newpos = handlecs(buf, where, \
465 curpmode, lvl, result, end)
466 if do_ret != None:
467 return do_ret
469 elif foundchar in my_cc[CC_COMMENT]:
470 newpos, data = parseit(buf, \
471 mode(MODE_COMMENT), where+1, lvl)
472 result.append(chunk(COMMENT, where, data))
474 elif foundchar in my_cc[CC_MATHSHIFT]:
475 # note that recursive calls to math-mode
476 # scanning are called with recursion-level 0
477 # again, in order to check for bad mathend
479 if where + 1 != end and \
480 buf[where + 1] in \
481 my_cc[CC_MATHSHIFT]:
483 # double mathshift, e.g. '$$'
485 if curpmode == mode(MODE_REGULAR):
486 newpos, data = parseit(buf, \
487 mode(MODE_DMATH), \
488 where+2, 0)
489 result.append(chunk(DMATH, \
490 where, data))
491 elif curpmode == mode(MODE_MATH):
492 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
493 elif lvl != 1:
494 raise error, 'bad mathend.' + \
495 lle(lvl, buf, where)
496 else:
497 return where + 2, result
498 else:
500 # single math shift, e.g. '$'
502 if curpmode == mode(MODE_REGULAR):
503 newpos, data = parseit(buf, \
504 mode(MODE_MATH), \
505 where+1, 0)
506 result.append(chunk(MATH, \
507 where, data))
508 elif curpmode == mode(MODE_DMATH):
509 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
510 elif lvl != 1:
511 raise error, 'bad mathend.' + \
512 lv(lvl, buf, where)
513 else:
514 return where + 1, result
516 elif foundchar in my_cc[CC_IGNORE]:
517 print 'warning: ignored char', `foundchar`
518 newpos = where + 1
520 elif foundchar in my_cc[CC_ACTIVE]:
521 result.append(chunk(ACTIVE, where, foundchar))
522 newpos = where + 1
524 elif foundchar in my_cc[CC_INVALID]:
525 raise error, 'invalid char ' + `foundchar`
526 newpos = where + 1
528 elif foundchar in my_cc[CC_ENDLINE]:
530 # after an end of line, eat the rest of
531 # whitespace on the beginning of the next line
532 # this is what LaTeX more or less does
534 # also, try to indicate double newlines (\par)
536 lineno = lineno + 1
537 savedwhere = where
538 newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
539 if newpos != end and buf[newpos] in \
540 my_cc[CC_ENDLINE]:
541 result.append(chunk(DENDLINE, \
542 savedwhere, foundchar))
543 else:
544 result.append(chunk(ENDLINE, \
545 savedwhere, foundchar))
546 else:
547 result.append(chunk(OTHER, where, foundchar))
548 newpos = where + 1
550 elif parsemode == mode(MODE_CS_SCAN):
552 # scan for a control sequence token. `\ape', `\nut' or `\%'
554 if start == end:
555 raise EOFError, 'can\'t find end of csname'
556 pos = rc_cs_scan.search(buf, start)
557 if pos < 0:
558 pos = end
559 if pos == start:
560 # first non-letter right where we started the search
561 # ---> the control sequence name consists of one single
562 # character. Also: don't eat white space...
563 if buf[pos] in my_cc[CC_ENDLINE]:
564 lineno = lineno + 1
565 pos = pos + 1
566 return pos, (start, pos)
567 else:
568 spos = pos
569 if buf[pos] == '\n':
570 lineno = lineno + 1
571 spos = pos + 1
572 pos2, dummy = parseit(buf, \
573 mode(MODE_GOBBLEWHITE), spos, lvl)
574 return pos2, (start, pos)
576 elif parsemode == mode(MODE_GOBBLEWHITE):
577 if start == end:
578 return start, ''
579 pos = rc_endwhite.search(buf, start)
580 if pos < 0:
581 pos = start
582 return pos, (start, pos)
584 elif parsemode == mode(MODE_COMMENT):
585 pos = rc_comment.search(buf, start)
586 lineno = lineno + 1
587 if pos < 0:
588 print 'no newline perhaps?'
589 raise EOFError, 'can\'t find end of comment'
590 pos = pos + 1
591 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
592 return pos2, (start, pos)
595 else:
596 raise error, 'Unknown mode (' + `parsemode` + ')'
599 #moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
601 #boxcommands = 'mbox', 'fbox'
602 #defcommands = 'def', 'newcommand'
604 endverbstr = '\\end{verbatim}'
606 re_endverb = regex.compile(un_re(endverbstr))
609 # handlecs: helper function for parseit, for the special thing we might
610 # wanna do after certain command control sequences
611 # returns: None or return_data, newpos
613 # in the latter case, the calling function is instructed to immediately
614 # return with the data in return_data
616 def handlecs(buf, where, curpmode, lvl, result, end):
617 global lineno
619 # get the control sequence name...
620 newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
621 saveddata = data
623 if s(buf, data) in ('begin', 'end'):
624 # skip the expected '{' and get the LaTeX-envname '}'
625 newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
626 if len(data) != 1:
627 raise error, 'expected 1 chunk of data.' + \
628 lle(lvl, buf, where)
630 # yucky, we've got an environment
631 envname = s(buf, data[0].data)
632 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
633 if s(buf, saveddata) == 'begin' and envname == 'verbatim':
634 # verbatim deserves special treatment
635 pos = re_endverb.search(buf, newpos)
636 if pos < 0:
637 raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
638 result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
639 newpos = pos + len(endverbstr)
641 elif s(buf, saveddata) == 'begin':
642 # start parsing recursively... If that parse returns
643 # from an '\end{...}', then should the last item of
644 # the returned data be a string containing the ended
645 # environment
646 newpos, data = parseit(buf, curpmode, newpos, lvl)
647 if not data or type(data[-1]) != type(''):
648 raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
649 retenv = data[-1]
650 del data[-1]
651 if retenv != envname:
652 #[`retenv`, `envname`]
653 raise error, 'environments do not match.' + \
654 lle(lvl, buf, where) + \
655 epsilon(buf, newpos)
656 result.append(chunk(ENV, where, (retenv, data)))
657 else:
658 # 'end'... append the environment name, as just
659 # pointed out, and order parsit to return...
660 result.append(envname)
661 ##print 'POINT of return: ' + epsilon(buf, newpos)
662 # the tuple will be returned by parseit
663 return (newpos, result), newpos
665 # end of \begin ... \end handling
667 elif s(buf, data)[0:2] == 'if':
668 # another scary monster: the 'if' directive
669 flag = s(buf, data)[2:]
671 # recursively call parseit, just like environment above..
672 # the last item of data should contain the if-termination
673 # e.g., 'else' of 'fi'
674 newpos, data = parseit(buf, curpmode, newpos, lvl)
675 if not data or data[-1] not in ('else', 'fi'):
676 raise error, 'wrong if... termination' + \
677 lle(lvl, buf, where) + epsilon(buf, newpos)
679 ifterm = data[-1]
680 del data[-1]
681 # 0 means dont_negate flag
682 result.append(chunk(IF, where, (flag, 0, data)))
683 if ifterm == 'else':
684 # do the whole thing again, there is only one way
685 # to end this one, by 'fi'
686 newpos, data = parseit(buf, curpmode, newpos, lvl)
687 if not data or data[-1] not in ('fi', ):
688 raise error, 'wrong if...else... termination' \
689 + lle(lvl, buf, where) \
690 + epsilon(buf, newpos)
692 ifterm = data[-1]
693 del data[-1]
694 result.append(chunk(IF, where, (flag, 1, data)))
695 #done implicitely: return None, newpos
697 elif s(buf, data) in ('else', 'fi'):
698 result.append(s(buf, data))
699 # order calling party to return tuple
700 return (newpos, result), newpos
702 # end of \if, \else, ... \fi handling
704 elif s(buf, saveddata) == 'verb':
705 x2 = saveddata[1]
706 result.append(chunk(CSNAME, where, data))
707 if x2 == end:
708 raise error, 'premature end of command.' + lle(lvl, buf, where)
709 delimchar = buf[x2]
710 ##print 'VERB: delimchar ' + `delimchar`
711 pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
712 if pos < 0:
713 raise error, 'end of \'verb\' argument (' + \
714 `delimchar` + ') not found.' + \
715 lle(lvl, buf, where)
716 result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
717 newpos = pos + 1
718 else:
719 result.append(chunk(CSNAME, where, data))
720 return None, newpos
722 # this is just a function to get the string value if the possible data-tuple
723 def s(buf, data):
724 if type(data) == type(''):
725 return data
726 if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
727 raise TypeError, 'expected tuple of 2 integers'
728 x1, x2 = data
729 return buf[x1:x2]
732 ##length, data1, i = getnextarg(length, buf, pp, i + 1)
734 # make a deep-copy of some chunks
735 def crcopy(r):
736 result = []
737 for x in r:
738 result.append(chunkcopy(x))
739 return result
743 # copy a chunk, would better be a method of class Chunk...
744 def chunkcopy(ch):
745 if ch.chtype == chunk_type(GROUP):
746 listc = ch.data[:]
747 for i in range(len(listc)):
748 listc[i] = chunkcopy(listc[i])
749 return chunk(GROUP, ch.where, listc)
750 else:
751 return chunk(ch.chtype, ch.where, ch.data)
754 # get next argument for TeX-macro, flatten a group (insert between)
755 # or return Command Sequence token, or give back one character
756 def getnextarg(length, buf, pp, item):
758 ##wobj = Wobj().init()
759 ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
760 ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
762 while item < length and pp[item].chtype == chunk_type(ENDLINE):
763 del pp[item]
764 length = length - 1
765 if item >= length:
766 raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
767 if pp[item].chtype == chunk_type(GROUP):
768 newpp = pp[item].data
769 del pp[item]
770 length = length - 1
771 changeit(buf, newpp)
772 length = length + len(newpp)
773 pp[item:item] = newpp
774 item = item + len(newpp)
775 if len(newpp) < 10:
776 wobj = Wobj().init()
777 dumpit(buf, wobj.write, newpp)
778 ##print 'GETNEXTARG: inserted ' + `wobj.data`
779 return length, item
780 elif pp[item].chtype == chunk_type(PLAIN):
781 #grab one char
782 print 'WARNING: grabbing one char'
783 if len(s(buf, pp[item].data)) > 1:
784 pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
785 item, length = item+1, length+1
786 pp[item].data = s(buf, pp[item].data)[1:]
787 else:
788 item = item+1
789 return length, item
790 else:
791 try:
792 str = `s(buf, ch.data)`
793 except TypeError:
794 str = `ch.data`
795 if len(str) > 400:
796 str = str[:400] + '...'
797 print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
798 return length, item
801 # this one is needed to find the end of LaTeX's optional argument, like
802 # item[...]
803 re_endopt = regex.compile(']')
805 # get a LaTeX-optional argument, you know, the square braces '[' and ']'
806 def getoptarg(length, buf, pp, item):
808 wobj = Wobj().init()
809 dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
810 ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
812 if item >= length or \
813 pp[item].chtype != chunk_type(PLAIN) or \
814 s(buf, pp[item].data)[0] != '[':
815 return length, item
817 pp[item].data = s(buf, pp[item].data)[1:]
818 if len(pp[item].data) == 0:
819 del pp[item]
820 length = length-1
822 while 1:
823 if item == length:
824 raise error, 'No end of optional arg found'
825 if pp[item].chtype == chunk_type(PLAIN):
826 text = s(buf, pp[item].data)
827 pos = re_endopt.search(text)
828 if pos >= 0:
829 pp[item].data = text[:pos]
830 if pos == 0:
831 del pp[item]
832 length = length-1
833 else:
834 item=item+1
835 text = text[pos+1:]
837 while text and text[0] in ' \t':
838 text = text[1:]
840 if text:
841 pp.insert(item, chunk(PLAIN, 0, text))
842 length = length + 1
843 return length, item
845 item = item+1
848 # Wobj just add write-requests to the ``data'' attribute
849 class Wobj:
850 def init(self):
851 self.data = ''
852 return self
853 def write(self, data):
854 self.data = self.data + data
856 # ignore these commands
857 ignoredcommands = ('bcode', 'ecode', 'optional')
858 # map commands like these to themselves as plaintext
859 wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF')
860 # \{ --> {, \} --> }, etc
861 themselves = ('{', '}', '.', '@') + wordsselves
862 # these ones also themselves (see argargs macro in myformat.sty)
863 inargsselves = (',', '[', ']', '(', ')')
864 # this is how *I* would show the difference between emph and strong
865 # code 1 means: fold to uppercase
866 markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
867 'strong': ('*', '*')}
869 # recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
870 fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
872 # transparent for these commands
873 for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', \
874 'r', 'i', 't')
877 # try to remove macros and return flat text
878 def flattext(buf, pp):
879 pp = crcopy(pp)
880 ##print '---> FLATTEXT ' + `pp`
881 wobj = Wobj().init()
883 i, length = 0, len(pp)
884 while 1:
885 if len(pp) != length:
886 raise 'FATAL', 'inconsistent length'
887 if i >= length:
888 break
889 ch = pp[i]
890 i = i+1
891 if ch.chtype == chunk_type(PLAIN):
892 pass
893 elif ch.chtype == chunk_type(CSNAME):
894 if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
895 ch.chtype = chunk_type(PLAIN)
896 elif s(buf, ch.data) == 'e':
897 ch.chtype = chunk_type(PLAIN)
898 ch.data = '\\'
899 elif len(s(buf, ch.data)) == 1 \
900 and s(buf, ch.data) in onlylatexspecial:
901 ch.chtype = chunk_type(PLAIN)
902 # if it is followed by an empty group,
903 # remove that group, it was needed for
904 # a true space
905 if i < length \
906 and pp[i].chtype==chunk_type(GROUP) \
907 and len(pp[i].data) == 0:
908 del pp[i]
909 length = length-1
911 elif s(buf, ch.data) in markcmds.keys():
912 length, newi = getnextarg(length, buf, pp, i)
913 str = flattext(buf, pp[i:newi])
914 del pp[i:newi]
915 length = length - (newi - i)
916 ch.chtype = chunk_type(PLAIN)
917 markcmd = s(buf, ch.data)
918 x = markcmds[markcmd]
919 if type(x) == type(()):
920 pre, after = x
921 str = pre+str+after
922 elif x == 1:
923 str = string.upper(str)
924 else:
925 raise 'FATAL', 'corrupt markcmds'
926 ch.data = str
927 else:
928 if s(buf, ch.data) not in ignoredcommands:
929 print 'WARNING: deleting command ' + `s(buf, ch.data)`
930 print 'PP' + `pp[i-1]`
931 del pp[i-1]
932 i, length = i-1, length-1
933 elif ch.chtype == chunk_type(GROUP):
934 length, newi = getnextarg(length, buf, pp, i-1)
935 i = i-1
936 ## str = flattext(buf, crcopy(pp[i-1:newi]))
937 ## del pp[i:newi]
938 ## length = length - (newi - i)
939 ## ch.chtype = chunk_type(PLAIN)
940 ## ch.data = str
941 else:
942 pass
944 dumpit(buf, wobj.write, pp)
945 ##print 'FLATTEXT: RETURNING ' + `wobj.data`
946 return wobj.data
948 # try to generate node names (a bit shorter than the chapter title)
949 # note that the \nodename command (see elsewhere) overules these efforts
950 def invent_node_names(text):
951 words = string.split(text)
953 ##print 'WORDS ' + `words`
955 if len(words) == 2 \
956 and string.lower(words[0]) == 'built-in' \
957 and string.lower(words[1]) not in ('modules', 'functions'):
958 return words[1]
959 if len(words) == 3 and string.lower(words[1]) == 'module':
960 return words[2]
961 if len(words) == 3 and string.lower(words[1]) == 'object':
962 return string.join(words[0:2])
963 if len(words) > 4 and string.lower(string.join(words[-4:])) == \
964 'methods and data attributes':
965 return string.join(words[:2])
966 return text
968 re_commas_etc = regex.compile('[,`\'@{}]')
970 re_whitespace = regex.compile('[ \t]*')
973 ##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
975 # look if the next non-white stuff is also a command, resulting in skipping
976 # double endlines (DENDLINE) too, and thus omitting \par's
977 # Sometimes this is too much, maybe consider DENDLINE's as stop
978 def next_command_p(length, buf, pp, i, cmdname):
980 while 1:
981 if i >= len(pp):
982 break
983 ch = pp[i]
984 i = i+1
985 if ch.chtype == chunk_type(ENDLINE):
986 continue
987 if ch.chtype == chunk_type(DENDLINE):
988 continue
989 if ch.chtype == chunk_type(PLAIN):
990 if re_whitespace.search(s(buf, ch.data)) == 0 and \
991 re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
992 continue
993 return -1
994 if ch.chtype == chunk_type(CSNAME):
995 if s(buf, ch.data) == cmdname:
996 return i # _after_ the command
997 return -1
998 return -1
1001 # things that are special to LaTeX, but not to texi..
1002 onlylatexspecial = '_~^$#&%'
1004 class Struct: pass
1006 hist = Struct()
1007 out = Struct()
1009 def startchange():
1010 global hist, out
1012 hist.inenv = []
1013 hist.nodenames = []
1014 hist.cindex = []
1015 hist.inargs = 0
1016 hist.enumeratenesting, hist.itemizenesting = 0, 0
1018 out.doublenodes = []
1019 out.doublecindeces = []
1022 spacech = [chunk(PLAIN, 0, ' ')]
1023 commach = [chunk(PLAIN, 0, ', ')]
1024 cindexch = [chunk(CSLINE, 0, 'cindex')]
1026 # the standard variation in symbols for itemize
1027 itemizesymbols = ['bullet', 'minus', 'dots']
1029 # same for enumerate
1030 enumeratesymbols = ['1', 'A', 'a']
1033 ## \begin{ {func,data,exc}desc }{name}...
1034 ## the resulting texi-code is dependent on the contents of indexsubitem
1037 # indexsubitem: `['XXX', 'function']
1038 # funcdesc:
1039 # deffn {`idxsi`} NAME (FUNCARGS)
1041 # indexsubitem: `['XXX', 'method']`
1042 # funcdesc:
1043 # defmethod {`idxsi[0]`} NAME (FUNCARGS)
1045 # indexsubitem: `['in', 'module', 'MODNAME']'
1046 # datadesc:
1047 # defcv data {`idxsi[1:]`} NAME
1048 # excdesc:
1049 # defcv exception {`idxsi[1:]`} NAME
1050 # funcdesc:
1051 # deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1053 # indexsubitem: `['OBJECT', 'attribute']'
1054 # datadesc
1055 # defcv attribute {`OBJECT`} NAME
1058 ## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1059 ## or \funcline{NAME}{ARGS}
1061 def do_funcdesc(length, buf, pp, i):
1062 startpoint = i-1
1063 ch = pp[startpoint]
1064 wh = ch.where
1065 length, newi = getnextarg(length, buf, pp, i)
1066 funcname = chunk(GROUP, wh, pp[i:newi])
1067 del pp[i:newi]
1068 length = length - (newi-i)
1069 save = hist.inargs
1070 hist.inargs = 1
1071 length, newi = getnextarg(length, buf, pp, i)
1072 hist.inargs = save
1073 del save
1074 the_args = [chunk(PLAIN, wh, '()'[0])] + \
1075 pp[i:newi] + \
1076 [chunk(PLAIN, wh, '()'[1])]
1077 del pp[i:newi]
1078 length = length - (newi-i)
1080 idxsi = hist.indexsubitem # words
1081 command = ''
1082 cat_class = ''
1083 if idxsi and idxsi[-1] in ('method', 'attribute'):
1084 command = 'defmethod'
1085 cat_class = string.join(idxsi[:-1])
1086 elif len(idxsi) == 2 and idxsi[1] == 'function':
1087 command = 'deffn'
1088 cat_class = string.join(idxsi)
1089 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1090 command = 'deffn'
1091 cat_class = 'function of ' + string.join(idxsi[1:])
1093 if not command:
1094 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1096 ch.chtype = chunk_type(CSLINE)
1097 ch.data = command
1099 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1100 cslinearg.append(chunk(PLAIN, wh, ' '))
1101 cslinearg.append(funcname)
1102 cslinearg.append(chunk(PLAIN, wh, ' '))
1103 l = len(cslinearg)
1104 cslinearg[l:l] = the_args
1106 pp.insert(i, chunk(GROUP, wh, cslinearg))
1107 i, length = i+1, length+1
1108 hist.command = command
1109 return length, i
1112 ## this routine will be called on \begin{excdesc}{NAME}
1113 ## or \excline{NAME}
1115 def do_excdesc(length, buf, pp, i):
1116 startpoint = i-1
1117 ch = pp[startpoint]
1118 wh = ch.where
1119 length, newi = getnextarg(length, buf, pp, i)
1120 excname = chunk(GROUP, wh, pp[i:newi])
1121 del pp[i:newi]
1122 length = length - (newi-i)
1124 idxsi = hist.indexsubitem # words
1125 command = ''
1126 cat_class = ''
1127 class_class = ''
1128 if len(idxsi) == 2 and idxsi[1] == 'exception':
1129 command = 'defvr'
1130 cat_class = string.join(idxsi)
1131 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1132 command = 'defcv'
1133 cat_class = 'exception'
1134 class_class = string.join(idxsi[1:])
1135 elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
1136 command = 'defcv'
1137 cat_class = 'exception'
1138 class_class = string.join(idxsi[2:])
1141 if not command:
1142 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1144 ch.chtype = chunk_type(CSLINE)
1145 ch.data = command
1147 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1148 cslinearg.append(chunk(PLAIN, wh, ' '))
1149 if class_class:
1150 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1151 cslinearg.append(chunk(PLAIN, wh, ' '))
1152 cslinearg.append(excname)
1154 pp.insert(i, chunk(GROUP, wh, cslinearg))
1155 i, length = i+1, length+1
1156 hist.command = command
1157 return length, i
1159 ## same for datadesc or dataline...
1160 def do_datadesc(length, buf, pp, i):
1161 startpoint = i-1
1162 ch = pp[startpoint]
1163 wh = ch.where
1164 length, newi = getnextarg(length, buf, pp, i)
1165 dataname = chunk(GROUP, wh, pp[i:newi])
1166 del pp[i:newi]
1167 length = length - (newi-i)
1169 idxsi = hist.indexsubitem # words
1170 command = ''
1171 cat_class = ''
1172 class_class = ''
1173 if idxsi[-1] == 'attribute':
1174 command = 'defcv'
1175 cat_class = 'attribute'
1176 class_class = string.join(idxsi[:-1])
1177 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1178 command = 'defcv'
1179 cat_class = 'data'
1180 class_class = string.join(idxsi[1:])
1181 elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
1182 command = 'defcv'
1183 cat_class = 'data'
1184 class_class = string.join(idxsi[2:])
1187 if not command:
1188 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1190 ch.chtype = chunk_type(CSLINE)
1191 ch.data = command
1193 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1194 cslinearg.append(chunk(PLAIN, wh, ' '))
1195 if class_class:
1196 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1197 cslinearg.append(chunk(PLAIN, wh, ' '))
1198 cslinearg.append(dataname)
1200 pp.insert(i, chunk(GROUP, wh, cslinearg))
1201 i, length = i+1, length+1
1202 hist.command = command
1203 return length, i
1206 # regular indices: those that are not set in tt font by default....
1207 regindices = ('cindex', )
1209 # remove illegal characters from node names
1210 def rm_commas_etc(text):
1211 result = ''
1212 changed = 0
1213 while 1:
1214 pos = re_commas_etc.search(text)
1215 if pos >= 0:
1216 changed = 1
1217 result = result + text[:pos]
1218 text = text[pos+1:]
1219 else:
1220 result = result + text
1221 break
1222 if changed:
1223 print 'Warning: nodename changhed to ' + `result`
1225 return result
1227 # boolean flags
1228 flags = {'texi': 1}
1232 ## changeit: the actual routine, that changes the contents of the parsed
1233 ## chunks
1236 def changeit(buf, pp):
1237 global onlylatexspecial, hist, out
1239 i, length = 0, len(pp)
1240 while 1:
1241 # sanity check: length should always equal len(pp)
1242 if len(pp) != length:
1243 raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
1244 if i >= length:
1245 break
1246 ch = pp[i]
1247 i = i + 1
1249 if type(ch) == type(''):
1250 #normally, only chunks are present in pp,
1251 # but in some cases, some extra info
1252 # has been inserted, e.g., the \end{...} clauses
1253 raise 'FATAL', 'got string, probably too many ' + `end`
1255 if ch.chtype == chunk_type(GROUP):
1256 # check for {\em ...} constructs
1257 if ch.data and \
1258 ch.data[0].chtype == chunk_type(CSNAME) and \
1259 s(buf, ch.data[0].data) in fontchanges.keys():
1260 k = s(buf, ch.data[0].data)
1261 del ch.data[0]
1262 pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
1263 length, i = length+1, i+1
1265 # recursively parse the contents of the group
1266 changeit(buf, ch.data)
1268 elif ch.chtype == chunk_type(IF):
1269 # \if...
1270 flag, negate, data = ch.data
1271 ##print 'IF: flag, negate = ' + `flag, negate`
1272 if flag not in flags.keys():
1273 raise error, 'unknown flag ' + `flag`
1275 value = flags[flag]
1276 if negate:
1277 value = (not value)
1278 del pp[i-1]
1279 length, i = length-1, i-1
1280 if value:
1281 pp[i:i] = data
1282 length = length + len(data)
1285 elif ch.chtype == chunk_type(ENV):
1286 # \begin{...} ....
1287 envname, data = ch.data
1289 #push this environment name on stack
1290 hist.inenv.insert(0, envname)
1292 #append an endenv chunk after grouped data
1293 data.append(chunk(ENDENV, ch.where, envname))
1294 ##[`data`]
1296 #delete this object
1297 del pp[i-1]
1298 i, length = i-1, length-1
1300 #insert found data
1301 pp[i:i] = data
1302 length = length + len(data)
1304 if envname == 'verbatim':
1305 pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
1306 chunk(GROUP, ch.where, [])]
1307 length, i = length+2, i+2
1309 elif envname == 'itemize':
1310 if hist.itemizenesting > len(itemizesymbols):
1311 raise error, 'too deep itemize nesting'
1312 ingroupch = [chunk(CSNAME, ch.where,\
1313 itemizesymbols[hist.itemizenesting])]
1314 hist.itemizenesting = hist.itemizenesting + 1
1315 pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
1316 chunk(GROUP, ch.where, ingroupch)]
1317 length, i = length+2, i+2
1319 elif envname == 'enumerate':
1320 if hist.enumeratenesting > len(enumeratesymbols):
1321 raise error, 'too deep enumerate nesting'
1322 ingroupch = [chunk(PLAIN, ch.where,\
1323 enumeratesymbols[hist.enumeratenesting])]
1324 hist.enumeratenesting = hist.enumeratenesting + 1
1325 pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
1326 chunk(GROUP, ch.where, ingroupch)]
1327 length, i = length+2, i+2
1329 elif envname == 'description':
1330 ingroupch = [chunk(CSNAME, ch.where, 'b')]
1331 pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
1332 chunk(GROUP, ch.where, ingroupch)]
1333 length, i = length+2, i+2
1335 elif envname == 'tableiii':
1336 wh = ch.where
1337 newcode = []
1339 #delete tabular format description
1340 # e.g., {|l|c|l|}
1341 length, newi = getnextarg(length, buf, pp, i)
1342 del pp[i:newi]
1343 length = length - (newi-i)
1345 newcode.append(chunk(CSLINE, wh, 'table'))
1346 ingroupch = [chunk(CSNAME, wh, 'asis')]
1347 newcode.append(chunk(GROUP, wh, ingroupch))
1348 newcode.append(chunk(CSLINE, wh, 'item'))
1350 #get the name of macro for @item
1351 # e.g., {code}
1352 length, newi = getnextarg(length, buf, pp, i)
1354 if newi-i != 1:
1355 raise error, 'Sorry, expected 1 chunk argument'
1356 if pp[i].chtype != chunk_type(PLAIN):
1357 raise error, 'Sorry, expected plain text argument'
1358 hist.itemargmacro = s(buf, pp[i].data)
1359 del pp[i:newi]
1360 length = length - (newi-i)
1362 for count in range(3):
1363 length, newi = getnextarg(length, buf, pp, i)
1364 emphgroup = [\
1365 chunk(CSNAME, wh, 'emph'), \
1366 chunk(GROUP, 0, pp[i:newi])]
1367 del pp[i:newi]
1368 length = length - (newi-i)
1369 if count == 0:
1370 itemarg = emphgroup
1371 elif count == 2:
1372 itembody = itembody + \
1373 [chunk(PLAIN, wh, ' --- ')] + \
1374 emphgroup
1375 else:
1376 itembody = emphgroup
1377 newcode.append(chunk(GROUP, wh, itemarg))
1378 newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
1379 pp[i:i] = newcode
1380 l = len(newcode)
1381 length, i = length+l, i+l
1382 del newcode, l
1384 if length != len(pp):
1385 raise 'STILL, SOMETHING wrong', `i`
1388 elif envname == 'funcdesc':
1389 pp.insert(i, chunk(PLAIN, ch.where, ''))
1390 i, length = i+1, length+1
1391 length, i = do_funcdesc(length, buf, pp, i)
1393 elif envname == 'excdesc':
1394 pp.insert(i, chunk(PLAIN, ch.where, ''))
1395 i, length = i+1, length+1
1396 length, i = do_excdesc(length, buf, pp, i)
1398 elif envname == 'datadesc':
1399 pp.insert(i, chunk(PLAIN, ch.where, ''))
1400 i, length = i+1, length+1
1401 length, i = do_datadesc(length, buf, pp, i)
1403 else:
1404 print 'WARNING: don\'t know what to do with env ' + `envname`
1406 elif ch.chtype == chunk_type(ENDENV):
1407 envname = ch.data
1408 if envname != hist.inenv[0]:
1409 raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
1410 del hist.inenv[0]
1411 del pp[i-1]
1412 i, length = i-1, length-1
1414 if envname == 'verbatim':
1415 pp[i:i] = [\
1416 chunk(CSLINE, ch.where, 'end'), \
1417 chunk(GROUP, ch.where, [\
1418 chunk(PLAIN, ch.where, 'example')])]
1419 i, length = i+2, length+2
1420 elif envname == 'itemize':
1421 hist.itemizenesting = hist.itemizenesting - 1
1422 pp[i:i] = [\
1423 chunk(CSLINE, ch.where, 'end'), \
1424 chunk(GROUP, ch.where, [\
1425 chunk(PLAIN, ch.where, 'itemize')])]
1426 i, length = i+2, length+2
1427 elif envname == 'enumerate':
1428 hist.enumeratenesting = hist.enumeratenesting-1
1429 pp[i:i] = [\
1430 chunk(CSLINE, ch.where, 'end'), \
1431 chunk(GROUP, ch.where, [\
1432 chunk(PLAIN, ch.where, 'enumerate')])]
1433 i, length = i+2, length+2
1434 elif envname == 'description':
1435 pp[i:i] = [\
1436 chunk(CSLINE, ch.where, 'end'), \
1437 chunk(GROUP, ch.where, [\
1438 chunk(PLAIN, ch.where, 'table')])]
1439 i, length = i+2, length+2
1440 elif envname == 'tableiii':
1441 pp[i:i] = [\
1442 chunk(CSLINE, ch.where, 'end'), \
1443 chunk(GROUP, ch.where, [\
1444 chunk(PLAIN, ch.where, 'table')])]
1445 i, length = i+2, length + 2
1446 pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
1447 i, length = i+1, length+1
1449 elif envname in ('funcdesc', 'excdesc', 'datadesc'):
1450 pp[i:i] = [\
1451 chunk(CSLINE, ch.where, 'end'), \
1452 chunk(GROUP, ch.where, [\
1453 chunk(PLAIN, ch.where, hist.command)])]
1454 i, length = i+2, length+2
1455 else:
1456 print 'WARNING: ending env ' + `envname` + 'has no actions'
1458 elif ch.chtype == chunk_type(CSNAME):
1459 # control name transformations
1460 if s(buf, ch.data) in ignoredcommands:
1461 del pp[i-1]
1462 i, length = i-1, length-1
1463 elif s(buf, ch.data) == '@' and \
1464 i != length and \
1465 pp[i].chtype == chunk_type(PLAIN) and \
1466 s(buf, pp[i].data)[0] == '.':
1467 # \@. --> \. --> @.
1468 ch.data = '.'
1469 del pp[i]
1470 length = length-1
1471 elif s(buf, ch.data) == '\\':
1472 # \\ --> \* --> @*
1473 ch.data = '*'
1474 elif len(s(buf, ch.data)) == 1 and \
1475 s(buf, ch.data) in onlylatexspecial:
1476 ch.chtype = chunk_type(PLAIN)
1477 # check if such a command is followed by
1478 # an empty group: e.g., `\%{}'. If so, remove
1479 # this empty group too
1480 if i < length and \
1481 pp[i].chtype == chunk_type(GROUP) \
1482 and len(pp[i].data) == 0:
1483 del pp[i]
1484 length = length-1
1486 elif hist.inargs and s(buf, ch.data) in inargsselves:
1487 # This is the special processing of the
1488 # arguments of the \begin{funcdesc}... or
1489 # \funcline... arguments
1490 # \, --> , \[ --> [, \] --> ]
1491 ch.chtype = chunk_type(PLAIN)
1493 elif s(buf, ch.data) == 'renewcommand':
1494 # \renewcommand{\indexsubitem}....
1495 i, length = i-1, length-1
1496 del pp[i]
1497 length, newi = getnextarg(length, buf, pp, i)
1498 if newi-i == 1 \
1499 and i < length \
1500 and pp[i].chtype == chunk_type(CSNAME) \
1501 and s(buf, pp[i].data) == 'indexsubitem':
1502 del pp[i:newi]
1503 length = length - (newi-i)
1504 length, newi = getnextarg(length, buf, pp, i)
1505 text = flattext(buf, pp[i:newi])
1506 if text[0] != '(' or text[-1] != ')':
1507 raise error, 'expected indexsubitme enclosed in braces'
1508 words = string.split(text[1:-1])
1509 hist.indexsubitem = words
1510 del text, words
1511 else:
1512 print 'WARNING: renewcommand with unsupported arg removed'
1513 del pp[i:newi]
1514 length = length - (newi-i)
1516 elif s(buf, ch.data) == 'item':
1517 ch.chtype = chunk_type(CSLINE)
1518 length, newi = getoptarg(length, buf, pp, i)
1519 ingroupch = pp[i:newi]
1520 del pp[i:newi]
1521 length = length - (newi-i)
1522 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1523 i, length = i+1, length+1
1525 elif s(buf, ch.data) == 'ttindex':
1526 idxsi = hist.indexsubitem
1528 cat_class = ''
1529 if len(idxsi) >= 2 and idxsi[1] in \
1530 ('method', 'function'):
1531 command = 'findex'
1532 elif len(idxsi) >= 2 and idxsi[1] in \
1533 ('exception', 'object'):
1534 command = 'vindex'
1535 else:
1536 print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
1537 command = 'cindex'
1539 if not cat_class:
1540 cat_class = '('+string.join(idxsi)+')'
1542 ch.chtype = chunk_type(CSLINE)
1543 ch.data = command
1545 length, newi = getnextarg(length, buf, pp, i)
1546 arg = pp[i:newi]
1547 del pp[i:newi]
1548 length = length - (newi-i)
1550 cat_arg = [chunk(PLAIN, ch.where, cat_class)]
1552 # determine what should be set in roman, and
1553 # what in tt-font
1554 if command in regindices:
1556 arg = [chunk(CSNAME, ch.where, 't'), \
1557 chunk(GROUP, ch.where, arg)]
1558 else:
1559 cat_arg = [chunk(CSNAME, ch.where, 'r'), \
1560 chunk(GROUP, ch.where, cat_arg)]
1562 ingroupch = arg + \
1563 [chunk(PLAIN, ch.where, ' ')] + \
1564 cat_arg
1566 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1567 length, i = length+1, i+1
1570 elif s(buf, ch.data) == 'ldots':
1571 # \ldots --> \dots{} --> @dots{}
1572 ch.data = 'dots'
1573 if i == length \
1574 or pp[i].chtype != chunk_type(GROUP) \
1575 or pp[i].data != []:
1576 pp.insert(i, chunk(GROUP, ch.where, []))
1577 i, length = i+1, length+1
1578 elif s(buf, ch.data) in wordsselves:
1579 # \UNIX --> UNIX
1580 ch.chtype = chunk_type(PLAIN)
1581 if i != length \
1582 and pp[i].chtype == chunk_type(GROUP) \
1583 and pp[i].data == []:
1584 del pp[i]
1585 length = length-1
1586 elif s(buf, ch.data) in for_texi:
1587 pass
1589 elif s(buf, ch.data) == 'e':
1590 # \e --> \
1591 ch.data = '\\'
1592 ch.chtype = chunk_type(PLAIN)
1593 elif s(buf, ch.data) == 'lineiii':
1594 # This is the most tricky one
1595 # \lineiii{a1}{a2}[{a3}] -->
1596 # @item @<cts. of itemargmacro>{a1}
1597 # a2 [ -- a3]
1599 ##print 'LINEIIIIII!!!!!!!'
1600 ## wobj = Wobj().init()
1601 ## dumpit(buf, wobj.write, pp[i-1:i+5])
1602 ## print '--->' + wobj.data + '<----'
1603 if not hist.inenv:
1604 raise error, \
1605 'no environment for lineiii'
1606 if hist.inenv[0] != 'tableiii':
1607 raise error, \
1608 'wrong command (' + \
1609 `'lineiii'` + \
1610 ') in wrong environment (' \
1611 + `hist.inenv[0]` + ')'
1612 ch.chtype = chunk_type(CSLINE)
1613 ch.data = 'item'
1614 length, newi = getnextarg(length, buf, pp, i)
1615 ingroupch = [chunk(CSNAME, 0, \
1616 hist.itemargmacro), \
1617 chunk(GROUP, 0, pp[i:newi])]
1618 del pp[i:newi]
1619 length = length - (newi-i)
1620 ## print 'ITEM ARG: --->',
1621 ## wobj = Wobj().init()
1622 ## dumpit(buf, wobj.write, ingroupch)
1623 ## print wobj.data, '<---'
1624 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1625 grouppos = i
1626 i, length = i+1, length+1
1627 length, i = getnextarg(length, buf, pp, i)
1628 length, newi = getnextarg(length, buf, pp, i)
1629 if newi > i:
1630 # we have a 3rd arg
1631 pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
1632 i = newi + 1
1633 length = length + 1
1634 ## pp[grouppos].data = pp[grouppos].data \
1635 ## + [chunk(PLAIN, ch.where, ' ')] \
1636 ## + pp[i:newi]
1637 ## del pp[i:newi]
1638 ## length = length - (newi-i)
1639 if length != len(pp):
1640 raise 'IN LINEIII IS THE ERR', `i`
1642 elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
1643 #\xxxsection{A} ---->
1644 # @node A, , ,
1645 # @xxxsection A
1646 ## also: remove commas and quotes
1647 ch.chtype = chunk_type(CSLINE)
1648 length, newi = getnextarg(length, buf, pp, i)
1649 afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
1650 if afternodenamecmd < 0:
1651 cp1 = crcopy(pp[i:newi])
1652 pp[i:newi] = [\
1653 chunk(GROUP, ch.where, \
1654 pp[i:newi])]
1655 length, newi = length - (newi-i) + 1, \
1657 text = flattext(buf, cp1)
1658 text = invent_node_names(text)
1659 else:
1660 length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
1661 cp1 = crcopy(pp[afternodenamecmd:endarg])
1662 del pp[newi:endarg]
1663 length = length - (endarg-newi)
1665 pp[i:newi] = [\
1666 chunk(GROUP, ch.where, \
1667 pp[i:newi])]
1668 length, newi = length - (newi-i) + 1, \
1669 i + 1
1670 text = flattext(buf, cp1)
1671 if text[-1] == '.':
1672 text = text[:-1]
1673 ## print 'FLATTEXT:', `text`
1674 if text in hist.nodenames:
1675 print 'WARNING: node name ' + `text` + ' already used'
1676 out.doublenodes.append(text)
1677 else:
1678 hist.nodenames.append(text)
1679 text = rm_commas_etc(text)
1680 pp[i-1:i-1] = [\
1681 chunk(CSLINE, ch.where, 'node'), \
1682 chunk(GROUP, ch.where, [\
1683 chunk(PLAIN, ch.where, text+', , ,')\
1685 i, length = newi+2, length+2
1687 elif s(buf,ch.data) == 'funcline':
1688 # fold it to a very short environment
1689 pp[i-1:i-1] = [\
1690 chunk(CSLINE, ch.where, 'end'), \
1691 chunk(GROUP, ch.where, [\
1692 chunk(PLAIN, ch.where, hist.command)])]
1693 i, length = i+2, length+2
1694 length, i = do_funcdesc(length, buf, pp, i)
1696 elif s(buf,ch.data) == 'dataline':
1697 pp[i-1:i-1] = [\
1698 chunk(CSLINE, ch.where, 'end'), \
1699 chunk(GROUP, ch.where, [\
1700 chunk(PLAIN, ch.where, hist.command)])]
1701 i, length = i+2, length+2
1702 length, i = do_datadesc(length, buf, pp, i)
1704 elif s(buf,ch.data) == 'excline':
1705 pp[i-1:i-1] = [\
1706 chunk(CSLINE, ch.where, 'end'), \
1707 chunk(GROUP, ch.where, [\
1708 chunk(PLAIN, ch.where, hist.command)])]
1709 i, length = i+2, length+2
1710 length, i = do_excdesc(length, buf, pp, i)
1713 elif s(buf, ch.data) == 'index':
1714 #\index{A} --->
1715 # @cindex A
1716 ch.chtype = chunk_type(CSLINE)
1717 ch.data = 'cindex'
1718 length, newi = getnextarg(length, buf, pp, i)
1720 ingroupch = pp[i:newi]
1721 del pp[i:newi]
1722 length = length - (newi-i)
1723 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1724 length, i = length+1, i+1
1726 elif s(buf, ch.data) == 'bifuncindex':
1727 ch.chtype = chunk_type(CSLINE)
1728 ch.data = 'findex'
1729 length, newi = getnextarg(length, buf, pp, i)
1730 ingroupch = pp[i:newi]
1731 del pp[i:newi]
1732 length = length - (newi-i)
1734 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1735 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1736 ingroupch.append(chunk(GROUP, ch.where, [\
1737 chunk(PLAIN, ch.where, \
1738 '(built-in function)')]))
1740 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1741 length, i = length+1, i+1
1744 elif s(buf, ch.data) == 'opindex':
1745 ch.chtype = chunk_type(CSLINE)
1746 ch.data = 'findex'
1747 length, newi = getnextarg(length, buf, pp, i)
1748 ingroupch = pp[i:newi]
1749 del pp[i:newi]
1750 length = length - (newi-i)
1752 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1753 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1754 ingroupch.append(chunk(GROUP, ch.where, [\
1755 chunk(PLAIN, ch.where, \
1756 '(operator)')]))
1758 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1759 length, i = length+1, i+1
1762 elif s(buf, ch.data) == 'bimodindex':
1763 ch.chtype = chunk_type(CSLINE)
1764 ch.data = 'pindex'
1765 length, newi = getnextarg(length, buf, pp, i)
1766 ingroupch = pp[i:newi]
1767 del pp[i:newi]
1768 length = length - (newi-i)
1770 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1771 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1772 ingroupch.append(chunk(GROUP, ch.where, [\
1773 chunk(PLAIN, ch.where, \
1774 '(built-in)')]))
1776 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1777 length, i = length+1, i+1
1779 elif s(buf, ch.data) == 'sectcode':
1780 ch.data = 'code'
1783 elif s(buf, ch.data) == 'stmodindex':
1784 ch.chtype = chunk_type(CSLINE)
1785 # use the program index as module index
1786 ch.data = 'pindex'
1787 length, newi = getnextarg(length, buf, pp, i)
1788 ingroupch = pp[i:newi]
1789 del pp[i:newi]
1790 length = length - (newi-i)
1792 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1793 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1794 ingroupch.append(chunk(GROUP, ch.where, [\
1795 chunk(PLAIN, ch.where, \
1796 '(standard)')]))
1798 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1799 length, i = length+1, i+1
1802 elif s(buf, ch.data) == 'stindex':
1803 # XXX must actually go to newindex st
1804 wh = ch.where
1805 ch.chtype = chunk_type(CSLINE)
1806 ch.data = 'cindex'
1807 length, newi = getnextarg(length, buf, pp, i)
1808 ingroupch = [chunk(CSNAME, wh, 'code'), \
1809 chunk(GROUP, wh, pp[i:newi])]
1811 del pp[i:newi]
1812 length = length - (newi-i)
1814 t = ingroupch[:]
1815 t.append(chunk(PLAIN, wh, ' statement'))
1817 pp.insert(i, chunk(GROUP, wh, t))
1818 i, length = i+1, length+1
1820 pp.insert(i, chunk(CSLINE, wh, 'cindex'))
1821 i, length = i+1, length+1
1823 t = ingroupch[:]
1824 t.insert(0, chunk(PLAIN, wh, 'statement, '))
1826 pp.insert(i, chunk(GROUP, wh, t))
1827 i, length = i+1, length+1
1830 elif s(buf, ch.data) == 'indexii':
1831 #\indexii{A}{B} --->
1832 # @cindex A B
1833 # @cindex B, A
1834 length, newi = getnextarg(length, buf, pp, i)
1835 cp11 = pp[i:newi]
1836 cp21 = crcopy(pp[i:newi])
1837 del pp[i:newi]
1838 length = length - (newi-i)
1839 length, newi = getnextarg(length, buf, pp, i)
1840 cp12 = pp[i:newi]
1841 cp22 = crcopy(pp[i:newi])
1842 del pp[i:newi]
1843 length = length - (newi-i)
1845 ch.chtype = chunk_type(CSLINE)
1846 ch.data = 'cindex'
1847 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1848 chunk(PLAIN, ch.where, ' ')] + cp12))
1849 i, length = i+1, length+1
1850 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1851 chunk(GROUP, ch.where, cp22 + [\
1852 chunk(PLAIN, ch.where, ', ')]+ cp21)]
1853 i, length = i+2, length+2
1855 elif s(buf, ch.data) == 'indexiii':
1856 length, newi = getnextarg(length, buf, pp, i)
1857 cp11 = pp[i:newi]
1858 cp21 = crcopy(pp[i:newi])
1859 cp31 = crcopy(pp[i:newi])
1860 del pp[i:newi]
1861 length = length - (newi-i)
1862 length, newi = getnextarg(length, buf, pp, i)
1863 cp12 = pp[i:newi]
1864 cp22 = crcopy(pp[i:newi])
1865 cp32 = crcopy(pp[i:newi])
1866 del pp[i:newi]
1867 length = length - (newi-i)
1868 length, newi = getnextarg(length, buf, pp, i)
1869 cp13 = pp[i:newi]
1870 cp23 = crcopy(pp[i:newi])
1871 cp33 = crcopy(pp[i:newi])
1872 del pp[i:newi]
1873 length = length - (newi-i)
1875 ch.chtype = chunk_type(CSLINE)
1876 ch.data = 'cindex'
1877 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1878 chunk(PLAIN, ch.where, ' ')] + cp12 \
1879 + [chunk(PLAIN, ch.where, ' ')] \
1880 + cp13))
1881 i, length = i+1, length+1
1882 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1883 chunk(GROUP, ch.where, cp22 + [\
1884 chunk(PLAIN, ch.where, ' ')]+ cp23\
1885 + [chunk(PLAIN, ch.where, ', ')] +\
1886 cp21)]
1887 i, length = i+2, length+2
1888 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1889 chunk(GROUP, ch.where, cp33 + [\
1890 chunk(PLAIN, ch.where, ', ')]+ cp31\
1891 + [chunk(PLAIN, ch.where, ' ')] +\
1892 cp32)]
1893 i, length = i+2, length+2
1896 elif s(buf, ch.data) == 'indexiv':
1897 length, newi = getnextarg(length, buf, pp, i)
1898 cp11 = pp[i:newi]
1899 cp21 = crcopy(pp[i:newi])
1900 cp31 = crcopy(pp[i:newi])
1901 cp41 = crcopy(pp[i:newi])
1902 del pp[i:newi]
1903 length = length - (newi-i)
1904 length, newi = getnextarg(length, buf, pp, i)
1905 cp12 = pp[i:newi]
1906 cp22 = crcopy(pp[i:newi])
1907 cp32 = crcopy(pp[i:newi])
1908 cp42 = crcopy(pp[i:newi])
1909 del pp[i:newi]
1910 length = length - (newi-i)
1911 length, newi = getnextarg(length, buf, pp, i)
1912 cp13 = pp[i:newi]
1913 cp23 = crcopy(pp[i:newi])
1914 cp33 = crcopy(pp[i:newi])
1915 cp43 = crcopy(pp[i:newi])
1916 del pp[i:newi]
1917 length = length - (newi-i)
1918 length, newi = getnextarg(length, buf, pp, i)
1919 cp14 = pp[i:newi]
1920 cp24 = crcopy(pp[i:newi])
1921 cp34 = crcopy(pp[i:newi])
1922 cp44 = crcopy(pp[i:newi])
1923 del pp[i:newi]
1924 length = length - (newi-i)
1926 ch.chtype = chunk_type(CSLINE)
1927 ch.data = 'cindex'
1928 ingroupch = cp11 + \
1929 spacech + cp12 + \
1930 spacech + cp13 + \
1931 spacech + cp14
1932 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1933 i, length = i+1, length+1
1934 ingroupch = cp22 + \
1935 spacech + cp23 + \
1936 spacech + cp24 + \
1937 commach + cp21
1938 pp[i:i] = cindexch + [\
1939 chunk(GROUP, ch.where, ingroupch)]
1940 i, length = i+2, length+2
1941 ingroupch = cp33 + \
1942 spacech + cp34 + \
1943 commach + cp31 + \
1944 spacech + cp32
1945 pp[i:i] = cindexch + [\
1946 chunk(GROUP, ch.where, ingroupch)]
1947 i, length = i+2, length+2
1948 ingroupch = cp44 + \
1949 commach + cp41 + \
1950 spacech + cp42 + \
1951 spacech + cp43
1952 pp[i:i] = cindexch + [\
1953 chunk(GROUP, ch.where, ingroupch)]
1954 i, length = i+2, length+2
1958 else:
1959 print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
1963 re_atsign = regex.compile('[@{}]')
1964 re_newline = regex.compile('\n')
1966 def dumpit(buf, wm, pp):
1968 global out
1970 i, length = 0, len(pp)
1972 addspace = 0
1974 while 1:
1975 if len(pp) != length:
1976 raise 'FATAL', 'inconsistent length'
1977 if i == length:
1978 break
1979 ch = pp[i]
1980 i = i + 1
1982 if addspace:
1983 dospace = 1
1984 addspace = 0
1985 else:
1986 dospace = 0
1988 if ch.chtype == chunk_type(CSNAME):
1989 wm('@' + s(buf, ch.data))
1990 if s(buf, ch.data) == 'node' and \
1991 pp[i].chtype == chunk_type(PLAIN) and \
1992 s(buf, pp[i].data) in out.doublenodes:
1993 ##XXX doesnt work yet??
1994 wm(' ZZZ-' + zfill(`i`, 4))
1995 if s(buf, ch.data)[0] in string.letters:
1996 addspace = 1
1997 elif ch.chtype == chunk_type(PLAIN):
1998 if dospace and s(buf, ch.data) not in (' ', '\t'):
1999 wm(' ')
2000 text = s(buf, ch.data)
2001 while 1:
2002 pos = re_atsign.search(text)
2003 if pos < 0:
2004 break
2005 wm(text[:pos] + '@' + text[pos])
2006 text = text[pos+1:]
2007 wm(text)
2008 elif ch.chtype == chunk_type(GROUP):
2009 wm('{')
2010 dumpit(buf, wm, ch.data)
2011 wm('}')
2012 elif ch.chtype == chunk_type(DENDLINE):
2013 wm('\n\n')
2014 while i != length and pp[i].chtype in \
2015 (chunk_type(DENDLINE), chunk_type(ENDLINE)):
2016 i = i + 1
2017 elif ch.chtype == chunk_type(OTHER):
2018 wm(s(buf, ch.data))
2019 elif ch.chtype == chunk_type(ACTIVE):
2020 wm(s(buf, ch.data))
2021 elif ch.chtype == chunk_type(ENDLINE):
2022 wm('\n')
2023 elif ch.chtype == chunk_type(CSLINE):
2024 if i >= 2 and pp[i-2].chtype not in \
2025 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2026 and (pp[i-2].chtype != chunk_type(PLAIN) \
2027 or s(buf, pp[i-2].data)[-1] != '\n'):
2029 wm('\n')
2030 wm('@' + s(buf, ch.data))
2031 if i == length:
2032 raise error, 'CSLINE expected another chunk'
2033 if pp[i].chtype != chunk_type(GROUP):
2034 raise error, 'CSLINE expected GROUP'
2035 if type(pp[i].data) != type([]):
2036 raise error, 'GROUP chould contain []-data'
2038 wobj = Wobj().init()
2039 dumpit(buf, wobj.write, pp[i].data)
2040 i = i + 1
2041 text = wobj.data
2042 del wobj
2043 if text:
2044 wm(' ')
2045 while 1:
2046 pos = re_newline.search(text)
2047 if pos < 0:
2048 break
2049 print 'WARNING: found newline in csline arg'
2050 wm(text[:pos] + ' ')
2051 text = text[pos+1:]
2052 wm(text)
2053 if i >= length or \
2054 pp[i].chtype not in (chunk_type(CSLINE), \
2055 chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2056 and (pp[i].chtype != chunk_type(PLAIN) \
2057 or s(buf, pp[i].data)[0] != '\n'):
2058 wm('\n')
2060 elif ch.chtype == chunk_type(COMMENT):
2061 ## print 'COMMENT: previous chunk =', pp[i-2]
2062 if pp[i-2].chtype == chunk_type(PLAIN):
2063 print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
2064 if s(buf, ch.data) and \
2065 regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
2066 if i >= 2 and pp[i-2].chtype not in \
2067 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2068 and not (pp[i-2].chtype == chunk_type(PLAIN) \
2069 and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
2070 print 'ADDING NEWLINE'
2071 wm('\n')
2072 wm('@c ' + s(buf, ch.data))
2073 elif ch.chtype == chunk_type(IGNORE):
2074 pass
2075 else:
2076 try:
2077 str = `s(buf, ch.data)`
2078 except TypeError:
2079 str = `ch.data`
2080 if len(str) > 400:
2081 str = str[:400] + '...'
2082 print 'warning:', ch.chtype, 'not handled, data ' + str
2086 def main():
2087 outfile = None
2088 headerfile = 'texipre.dat'
2089 trailerfile = 'texipost.dat'
2091 try:
2092 opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
2093 except getopt.error:
2094 args = []
2096 if not args:
2097 print 'usage: partparse [-o outfile] [-h headerfile]',
2098 print '[-t trailerfile] file ...'
2099 sys.exit(2)
2101 for opt, arg in opts:
2102 if opt == '-o': outfile = arg
2103 if opt == '-h': headerfile = arg
2104 if opt == '-t': trailerfile = arg
2106 if not outfile:
2107 root, ext = os.path.splitext(args[0])
2108 outfile = root + '.texi'
2110 if outfile in args:
2111 print 'will not overwrite input file', outfile
2112 sys.exit(2)
2114 outf = open(outfile, 'w')
2115 outf.write(open(headerfile, 'r').read())
2117 for file in args:
2118 if len(args) > 1: print '='*20, file, '='*20
2119 buf = open(file, 'r').read()
2120 w, pp = parseit(buf)
2121 startchange()
2122 changeit(buf, pp)
2123 dumpit(buf, outf.write, pp)
2125 outf.write(open(trailerfile, 'r').read())
2127 outf.close()
2129 main()