Doc/partparse.py

   1 #
   2 # partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
   3 #     and generate texinfo source.
   4 #
   5 # This is *not* a good example of good programming practices. In fact, this
   6 #     file could use a complete rewrite, in order to become faster, more
   7 #     easy extensible and maintainable.
   8 #
   9 # However, I added some comments on a few places for the pityful person who
  10 #     would ever need to take a look into this file.
  11 #
  12 # Have I been clear enough??
  13 #
  14 # -jh
  15
  16
  17 import sys, string, regex, getopt, os
  18
  19 # Different parse modes for phase 1
  20 MODE_REGULAR = 0
  21 MODE_VERBATIM = 1
  22 MODE_CS_SCAN = 2
  23 MODE_COMMENT = 3
  24 MODE_MATH = 4
  25 MODE_DMATH = 5
  26 MODE_GOBBLEWHITE = 6
  27
  28 the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
  29           MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
  30
  31 # Show the neighbourhood of the scanned buffer
  32 def epsilon(buf, where):
  33         wmt, wpt = where - 10, where + 10
  34         if wmt < 0:
  35                 wmt = 0
  36         if wpt > len(buf):
  37                 wpt = len(buf)
  38         return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
  39
  40 # Should return the line number. never worked
  41 def lin():
  42         global lineno
  43         return ' Line ' + `lineno` + '.'
  44
  45 # Displays the recursion level.
  46 def lv(lvl):
  47         return ' Level ' + `lvl` + '.'
  48
  49 # Combine the three previous functions. Used often.
  50 def lle(lvl, buf, where):
  51         return lv(lvl) + lin() + epsilon(buf, where)
  52
  53
  54 # This class is only needed for _symbolic_ representation of the parse mode.
  55 class Mode:
  56         def init(self, arg):
  57                 if arg not in the_modes:
  58                         raise ValueError, 'mode not in the_modes'
  59                 self.mode = arg
  60                 return self
  61
  62         def __cmp__(self, other):
  63                 if type(self) != type(other):
  64                         other = mode(other)
  65                 return cmp(self.mode, other.mode)
  66
  67         def __repr__(self):
  68                 if self.mode == MODE_REGULAR:
  69                         return 'MODE_REGULAR'
  70                 elif self.mode == MODE_VERBATIM:
  71                         return 'MODE_VERBATIM'
  72                 elif self.mode == MODE_CS_SCAN:
  73                         return 'MODE_CS_SCAN'
  74                 elif self.mode == MODE_COMMENT:
  75                         return 'MODE_COMMENT'
  76                 elif self.mode == MODE_MATH:
  77                         return 'MODE_MATH'
  78                 elif self.mode == MODE_DMATH:
  79                         return 'MODE_DMATH'
  80                 elif self.mode == MODE_GOBBLEWHITE:
  81                         return 'MODE_GOBBLEWHITE'
  82                 else:
  83                         raise ValueError, 'mode not in the_modes'
  84
  85 # just a wrapper around a class initialisation
  86 def mode(arg):
  87         return Mode().init(arg)
  88
  89
  90 # After phase 1, the text consists of chunks, with a certain type
  91 # this type will be assigned to the chtype member of the chunk
  92 # the where-field contains the file position where this is found
  93 # and the data field contains (1): a tuple describing start- end end
  94 # positions of the substring (can be used as slice for the buf-variable),
  95 # (2) just a string, mostly generated by the changeit routine,
  96 # or (3) a list, describing a (recursive) subgroup of chunks
  97 PLAIN = 0                       # ASSUME PLAINTEXT, data = the text
  98 GROUP = 1                       # GROUP ({}), data = [chunk, chunk,..]
  99 CSNAME = 2                      # CONTROL SEQ TOKEN, data = the command
 100 COMMENT = 3                     # data is the actual comment
 101 DMATH = 4                       # DISPLAYMATH, data = [chunk, chunk,..]
 102 MATH = 5                        # MATH, see DISPLAYMATH
 103 OTHER = 6                       # CHAR WITH CATCODE OTHER, data = char
 104 ACTIVE = 7                      # ACTIVE CHAR
 105 GOBBLEDWHITE = 8                # Gobbled LWSP, after CSNAME
 106 ENDLINE = 9                     # END-OF-LINE, data = '\n'
 107 DENDLINE = 10                   # DOUBLE EOL, data='\n', indicates \par
 108 ENV = 11                        # LaTeX-environment
 109                                         # data =(envname,[ch,ch,ch,.])
 110 CSLINE = 12                     # for texi: next chunk will be one group
 111                                         # of args. Will be set all on 1 line
 112 IGNORE = 13                     # IGNORE this data
 113 ENDENV = 14                     # TEMP END OF GROUP INDICATOR
 114 IF = 15                         # IF-directive
 115                                         # data = (flag,negate,[ch, ch, ch,...])
 116 the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
 117           GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
 118
 119 # class, just to display symbolic name
 120 class ChunkType:
 121         def init(self, chunk_type):
 122                 if chunk_type not in the_types:
 123                         raise 'ValueError', 'chunk_type not in the_types'
 124                 self.chunk_type = chunk_type
 125                 return self
 126
 127         def __cmp__(self, other):
 128                 if type(self) != type(other):
 129                         other = chunk_type(other)
 130                 return cmp(self.chunk_type, other.chunk_type)
 131
 132         def __repr__(self):
 133                 if self.chunk_type == PLAIN:
 134                         return 'PLAIN'
 135                 elif self.chunk_type == GROUP:
 136                         return 'GROUP'
 137                 elif self.chunk_type == CSNAME:
 138                         return 'CSNAME'
 139                 elif self.chunk_type == COMMENT:
 140                         return 'COMMENT'
 141                 elif self.chunk_type == DMATH:
 142                         return 'DMATH'
 143                 elif self.chunk_type == MATH:
 144                         return 'MATH'
 145                 elif self.chunk_type == OTHER:
 146                         return 'OTHER'
 147                 elif self.chunk_type == ACTIVE:
 148                         return 'ACTIVE'
 149                 elif self.chunk_type == GOBBLEDWHITE:
 150                         return 'GOBBLEDWHITE'
 151                 elif self.chunk_type == DENDLINE:
 152                         return 'DENDLINE'
 153                 elif self.chunk_type == ENDLINE:
 154                         return 'ENDLINE'
 155                 elif self.chunk_type == ENV:
 156                         return 'ENV'
 157                 elif self.chunk_type == CSLINE:
 158                         return 'CSLINE'
 159                 elif self.chunk_type == IGNORE:
 160                         return 'IGNORE'
 161                 elif self.chunk_type == ENDENV:
 162                         return 'ENDENV'
 163                 elif self.chunk_type == IF:
 164                         return 'IF'
 165                 else:
 166                         raise ValueError, 'chunk_type not in the_types'
 167
 168 # ...and the wrapper
 169 def chunk_type(type):
 170         return ChunkType().init(type)
 171
 172 # store a type object of the ChunkType-class-instance...
 173 chunk_type_type = type(chunk_type(0))
 174
 175 # this class contains a part of the parsed buffer
 176 class Chunk:
 177         def init(self, chtype, where, data):
 178                 if type(chtype) != chunk_type_type:
 179                         chtype = chunk_type(chtype)
 180                 self.chtype = chtype
 181                 if type(where) != type(0):
 182                         raise TypeError, '\'where\' is not a number'
 183                 self.where = where
 184                 self.data = data
 185                 ##print 'CHUNK', self
 186                 return self
 187
 188         def __repr__(self):
 189                 return 'chunk' + `self.chtype, self.where, self.data`
 190
 191 # and the wrapper
 192 def chunk(chtype, where, data):
 193          return Chunk().init(chtype, where, data)
 194
 195
 196
 197 error = 'partparse.error'
 198
 199 #
 200 # TeX's catcodes...
 201 #
 202 CC_ESCAPE = 0
 203 CC_LBRACE = 1
 204 CC_RBRACE = 2
 205 CC_MATHSHIFT = 3
 206 CC_ALIGNMENT = 4
 207 CC_ENDLINE = 5
 208 CC_PARAMETER = 6
 209 CC_SUPERSCRIPT = 7
 210 CC_SUBSCRIPT = 8
 211 CC_IGNORE = 9
 212 CC_WHITE = 10
 213 CC_LETTER = 11
 214 CC_OTHER = 12
 215 CC_ACTIVE = 13
 216 CC_COMMENT = 14
 217 CC_INVALID = 15
 218
 219 # and the names
 220 cc_names = [\
 221           'CC_ESCAPE', \
 222           'CC_LBRACE', \
 223           'CC_RBRACE', \
 224           'CC_MATHSHIFT', \
 225           'CC_ALIGNMENT', \
 226           'CC_ENDLINE', \
 227           'CC_PARAMETER', \
 228           'CC_SUPERSCRIPT', \
 229           'CC_SUBSCRIPT', \
 230           'CC_IGNORE', \
 231           'CC_WHITE', \
 232           'CC_LETTER', \
 233           'CC_OTHER', \
 234           'CC_ACTIVE', \
 235           'CC_COMMENT', \
 236           'CC_INVALID', \
 237           ]
 238
 239 # Show a list of catcode-name-symbols
 240 def pcl(codelist):
 241         result = ''
 242         for i in codelist:
 243                 result = result + cc_names[i] + ', '
 244         return '[' + result[:-2] + ']'
 245
 246 # the name of the catcode (ACTIVE, OTHER, etc.)
 247 def pc(code):
 248         return cc_names[code]
 249
 250
 251 # Which catcodes make the parser stop parsing regular plaintext
 252 regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
 253           CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
 254           CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
 255
 256 # same for scanning a control sequence name
 257 csname_scancodes = [CC_LETTER]
 258
 259 # same for gobbling LWSP
 260 white_scancodes = [CC_WHITE]
 261 ##white_scancodes = [CC_WHITE, CC_ENDLINE]
 262
 263 # make a list of all catcode id's, except for catcode ``other''
 264 all_but_other_codes = range(16)
 265 del all_but_other_codes[CC_OTHER]
 266 ##print all_but_other_codes
 267
 268 # when does a comment end
 269 comment_stopcodes = [CC_ENDLINE]
 270
 271 # gather all characters together, specified by a list of catcodes
 272 def code2string(cc, codelist):
 273         ##print 'code2string: codelist = ' + pcl(codelist),
 274         result = ''
 275         for category in codelist:
 276                 if cc[category]:
 277                         result = result + cc[category]
 278         ##print 'result = ' + `result`
 279         return result
 280
 281 # automatically generate all characters of catcode other, being the
 282 # complement set in the ASCII range (128 characters)
 283 def make_other_codes(cc):
 284         otherchars = range(256)         # could be made 256, no problem
 285         for category in all_but_other_codes:
 286                 if cc[category]:
 287                         for c in cc[category]:
 288                                 otherchars[ord(c)] = None
 289         result = ''
 290         for i in otherchars:
 291                 if i != None:
 292                         result = result + chr(i)
 293         return result
 294
 295 # catcode dump (which characters have which catcodes).
 296 def dump_cc(name, cc):
 297         ##print '\t' + name
 298         ##print '=' * (8+len(name))
 299         if len(cc) != 16:
 300                 raise TypeError, 'cc not good cat class'
 301 ##      for i in range(16):
 302 ##              print pc(i) + '\t' + `cc[i]`
 303
 304
 305 # In the beginning,....
 306 epoch_cc = [None] * 16
 307 ##dump_cc('epoch_cc', epoch_cc)
 308
 309
 310 # INITEX
 311 initex_cc = epoch_cc[:]
 312 initex_cc[CC_ESCAPE] = '\\'
 313 initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
 314           '\n', '\0', ' '
 315 initex_cc[CC_LETTER] = string.uppercase + string.lowercase
 316 initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
 317 #initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
 318 ##dump_cc('initex_cc', initex_cc)
 319
 320
 321 # LPLAIN: LaTeX catcode setting (see lplain.tex)
 322 lplain_cc = initex_cc[:]
 323 lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
 324 lplain_cc[CC_MATHSHIFT] = '$'
 325 lplain_cc[CC_ALIGNMENT] = '&'
 326 lplain_cc[CC_PARAMETER] = '#'
 327 lplain_cc[CC_SUPERSCRIPT] = '^\x0B'     # '^' and C-k
 328 lplain_cc[CC_SUBSCRIPT] = '_\x01'       # '_' and C-a
 329 lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
 330 lplain_cc[CC_ACTIVE] = '~\x0C'          # '~' and C-l
 331 lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
 332 ##dump_cc('lplain_cc', lplain_cc)
 333
 334
 335 # Guido's LaTeX environment catcoded '_' as ``other''
 336 # my own purpose catlist
 337 my_cc = lplain_cc[:]
 338 my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
 339 my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_'       # add it to OTHER list
 340 dump_cc('my_cc', my_cc)
 341
 342
 343
 344 # needed for un_re, my equivalent for regexp-quote in Emacs
 345 re_meaning = '\\[]^$'
 346
 347 def un_re(str):
 348         result = ''
 349         for i in str:
 350                 if i in re_meaning:
 351                         result = result + '\\'
 352                 result = result + i
 353         return result
 354
 355 # NOTE the negate ('^') operator in *some* of the regexps below
 356 def make_rc_regular(cc):
 357         # problems here if '[]' are included!!
 358         return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
 359
 360 def make_rc_cs_scan(cc):
 361         return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
 362
 363 def make_rc_comment(cc):
 364         return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
 365
 366 def make_rc_endwhite(cc):
 367         return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
 368
 369
 370
 371 # regular: normal mode:
 372 rc_regular = make_rc_regular(my_cc)
 373
 374 # scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
 375 rc_cs_scan = make_rc_cs_scan(my_cc)
 376 rc_comment = make_rc_comment(my_cc)
 377 rc_endwhite = make_rc_endwhite(my_cc)
 378
 379
 380 # parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
 381 #     RECURSION-LEVEL will is incremented on entry.
 382 #     result contains the list of chunks returned
 383 #     together with this list, the buffer position is returned
 384
 385 #     RECURSION-LEVEL will be set to zero *again*, when recursively a
 386 #     {,D}MATH-mode scan has been enetered.
 387 #     This has been done in order to better check for environment-mismatches
 388
 389 def parseit(buf, *rest):
 390         global lineno
 391
 392         if len(rest) == 3:
 393                 parsemode, start, lvl = rest
 394         elif len(rest) == 2:
 395                 parsemode, start, lvl = rest + (0, )
 396         elif len(rest) == 1:
 397                 parsemode, start, lvl = rest + (0, 0)
 398         elif len(rest) == 0:
 399                 parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
 400         else:
 401                 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
 402         result = []
 403         end = len(buf)
 404         if lvl == 0 and parsemode == mode(MODE_REGULAR):
 405                 lineno = 1
 406         lvl = lvl + 1
 407
 408         ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
 409
 410         #
 411         # some of the more regular modes...
 412         #
 413
 414         if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
 415                 cstate = []
 416                 newpos = start
 417                 curpmode = parsemode
 418                 while 1:
 419                         where = newpos
 420                         #print '\tnew round: ' + epsilon(buf, where)
 421                         if where == end:
 422                                 if lvl > 1 or curpmode != mode(MODE_REGULAR):
 423                                         # not the way we started...
 424                                         raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
 425                                 # the real ending of lvl-1 parse
 426                                 return end, result
 427
 428                         pos = rc_regular.search(buf, where)
 429
 430                         if pos < 0:
 431                                 pos = end
 432
 433                         if pos != where:
 434                                 newpos, c = pos, chunk(PLAIN, where, (where, pos))
 435                                 result.append(c)
 436                                 continue
 437
 438
 439                         #
 440                         # ok, pos == where and pos != end
 441                         #
 442                         foundchar = buf[where]
 443                         if foundchar in my_cc[CC_LBRACE]:
 444                                 # recursive subgroup parse...
 445                                 newpos, data = parseit(buf, curpmode, where+1, lvl)
 446                                 result.append(chunk(GROUP, where, data))
 447
 448                         elif foundchar in my_cc[CC_RBRACE]:
 449                                 if lvl <= 1:
 450                                         raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
 451                                 if  lvl == 1 and mode != mode(MODE_REGULAR):
 452                                         raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
 453                                 return where + 1, result
 454
 455                         elif foundchar in my_cc[CC_ESCAPE]:
 456                                 #
 457                                 # call the routine that actually deals with
 458                                 #     this problem. If do_ret is None, than
 459                                 #     return the value of do_ret
 460                                 #
 461                                 # Note that handle_cs might call this routine
 462                                 #     recursively again...
 463                                 #
 464                                 do_ret, newpos = handlecs(buf, where, \
 465                                           curpmode, lvl, result, end)
 466                                 if do_ret != None:
 467                                         return do_ret
 468
 469                         elif foundchar in my_cc[CC_COMMENT]:
 470                                 newpos, data = parseit(buf, \
 471                                           mode(MODE_COMMENT), where+1, lvl)
 472                                 result.append(chunk(COMMENT, where, data))
 473
 474                         elif foundchar in my_cc[CC_MATHSHIFT]:
 475                                 # note that recursive calls to math-mode
 476                                 # scanning are called with recursion-level 0
 477                                 # again, in order to check for bad mathend
 478                                 #
 479                                 if where + 1 != end and \
 480                                           buf[where + 1] in \
 481                                           my_cc[CC_MATHSHIFT]:
 482                                         #
 483                                         # double mathshift, e.g. '$$'
 484                                         #
 485                                         if curpmode == mode(MODE_REGULAR):
 486                                                 newpos, data = parseit(buf, \
 487                                                           mode(MODE_DMATH), \
 488                                                           where+2, 0)
 489                                                 result.append(chunk(DMATH, \
 490                                                           where, data))
 491                                         elif curpmode == mode(MODE_MATH):
 492                                                 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
 493                                         elif lvl != 1:
 494                                                 raise error, 'bad mathend.' + \
 495                                                           lle(lvl, buf, where)
 496                                         else:
 497                                                 return where + 2, result
 498                                 else:
 499                                         #
 500                                         # single math shift, e.g. '$'
 501                                         #
 502                                         if curpmode == mode(MODE_REGULAR):
 503                                                 newpos, data = parseit(buf, \
 504                                                           mode(MODE_MATH), \
 505                                                           where+1, 0)
 506                                                 result.append(chunk(MATH, \
 507                                                           where, data))
 508                                         elif curpmode == mode(MODE_DMATH):
 509                                                 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
 510                                         elif lvl != 1:
 511                                                 raise error, 'bad mathend.' + \
 512                                                           lv(lvl, buf, where)
 513                                         else:
 514                                                 return where + 1, result
 515
 516                         elif foundchar in my_cc[CC_IGNORE]:
 517                                 print 'warning: ignored char', `foundchar`
 518                                 newpos = where + 1
 519
 520                         elif foundchar in my_cc[CC_ACTIVE]:
 521                                 result.append(chunk(ACTIVE, where, foundchar))
 522                                 newpos = where + 1
 523
 524                         elif foundchar in my_cc[CC_INVALID]:
 525                                 raise error, 'invalid char ' + `foundchar`
 526                                 newpos = where + 1
 527
 528                         elif foundchar in my_cc[CC_ENDLINE]:
 529                                 #
 530                                 # after an end of line, eat the rest of
 531                                 # whitespace on the beginning of the next line
 532                                 # this is what LaTeX more or less does
 533                                 #
 534                                 # also, try to indicate double newlines (\par)
 535                                 #
 536                                 lineno = lineno + 1
 537                                 savedwhere = where
 538                                 newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
 539                                 if newpos != end and buf[newpos] in \
 540                                           my_cc[CC_ENDLINE]:
 541                                         result.append(chunk(DENDLINE, \
 542                                                   savedwhere, foundchar))
 543                                 else:
 544                                         result.append(chunk(ENDLINE, \
 545                                                   savedwhere, foundchar))
 546                         else:
 547                                 result.append(chunk(OTHER, where, foundchar))
 548                                 newpos = where + 1
 549
 550         elif parsemode == mode(MODE_CS_SCAN):
 551                 #
 552                 # scan for a control sequence token. `\ape', `\nut' or `\%'
 553                 #
 554                 if start == end:
 555                         raise EOFError, 'can\'t find end of csname'
 556                 pos = rc_cs_scan.search(buf, start)
 557                 if pos < 0:
 558                         pos = end
 559                 if pos == start:
 560                         # first non-letter right where we started the search
 561                         # ---> the control sequence name consists of one single
 562                         # character. Also: don't eat white space...
 563                         if buf[pos] in my_cc[CC_ENDLINE]:
 564                                 lineno = lineno + 1
 565                         pos = pos + 1
 566                         return pos, (start, pos)
 567                 else:
 568                         spos = pos
 569                         if buf[pos] == '\n':
 570                                 lineno = lineno + 1
 571                                 spos = pos + 1
 572                         pos2, dummy = parseit(buf, \
 573                                   mode(MODE_GOBBLEWHITE), spos, lvl)
 574                         return pos2, (start, pos)
 575
 576         elif parsemode == mode(MODE_GOBBLEWHITE):
 577                 if start == end:
 578                         return start, ''
 579                 pos = rc_endwhite.search(buf, start)
 580                 if pos < 0:
 581                         pos = start
 582                 return pos, (start, pos)
 583
 584         elif parsemode == mode(MODE_COMMENT):
 585                 pos = rc_comment.search(buf, start)
 586                 lineno = lineno + 1
 587                 if pos < 0:
 588                         print 'no newline perhaps?'
 589                         raise EOFError, 'can\'t find end of comment'
 590                 pos = pos + 1
 591                 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
 592                 return pos2, (start, pos)
 593
 594
 595         else:
 596                 raise error, 'Unknown mode (' + `parsemode` + ')'
 597
 598
 599 #moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
 600
 601 #boxcommands = 'mbox', 'fbox'
 602 #defcommands = 'def', 'newcommand'
 603
 604 endverbstr = '\\end{verbatim}'
 605
 606 re_endverb = regex.compile(un_re(endverbstr))
 607
 608 #
 609 # handlecs: helper function for parseit, for the special thing we might
 610 #     wanna do after certain command control sequences
 611 # returns: None or return_data, newpos
 612 #
 613 # in the latter case, the calling function is instructed to immediately
 614 # return with the data in return_data
 615 #
 616 def handlecs(buf, where, curpmode, lvl, result, end):
 617         global lineno
 618
 619         # get the control sequence name...
 620         newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
 621         saveddata = data
 622
 623         if s(buf, data) in ('begin', 'end'):
 624                 # skip the expected '{' and get the LaTeX-envname '}'
 625                 newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
 626                 if len(data) != 1:
 627                         raise error, 'expected 1 chunk of data.' + \
 628                                   lle(lvl, buf, where)
 629
 630                 # yucky, we've got an environment
 631                 envname = s(buf, data[0].data)
 632                 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
 633                 if s(buf, saveddata) == 'begin' and envname == 'verbatim':
 634                         # verbatim deserves special treatment
 635                         pos = re_endverb.search(buf, newpos)
 636                         if pos < 0:
 637                                 raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
 638                         result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
 639                         newpos = pos + len(endverbstr)
 640
 641                 elif s(buf, saveddata) == 'begin':
 642                         # start parsing recursively... If that parse returns
 643                         # from an '\end{...}', then should the last item of
 644                         # the returned data be a string containing the ended
 645                         # environment
 646                         newpos, data = parseit(buf, curpmode, newpos, lvl)
 647                         if not data or type(data[-1]) != type(''):
 648                                 raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
 649                         retenv = data[-1]
 650                         del data[-1]
 651                         if retenv != envname:
 652                                 #[`retenv`, `envname`]
 653                                 raise error, 'environments do not match.' + \
 654                                           lle(lvl, buf, where) + \
 655                                           epsilon(buf, newpos)
 656                         result.append(chunk(ENV, where, (retenv, data)))
 657                 else:
 658                         # 'end'... append the environment name, as just
 659                         # pointed out, and order parsit to return...
 660                         result.append(envname)
 661                         ##print 'POINT of return: ' + epsilon(buf, newpos)
 662                         # the tuple will be returned by parseit
 663                         return (newpos, result), newpos
 664
 665         # end of \begin ... \end handling
 666
 667         elif s(buf, data)[0:2] == 'if':
 668                 # another scary monster: the 'if' directive
 669                 flag = s(buf, data)[2:]
 670
 671                 # recursively call parseit, just like environment above..
 672                 # the last item of data should contain the if-termination
 673                 # e.g., 'else' of 'fi'
 674                 newpos, data = parseit(buf, curpmode, newpos, lvl)
 675                 if not data or data[-1] not in ('else', 'fi'):
 676                         raise error, 'wrong if... termination' + \
 677                                   lle(lvl, buf, where) + epsilon(buf, newpos)
 678
 679                 ifterm = data[-1]
 680                 del data[-1]
 681                 # 0 means dont_negate flag
 682                 result.append(chunk(IF, where, (flag, 0, data)))
 683                 if ifterm == 'else':
 684                         # do the whole thing again, there is only one way
 685                         # to end this one, by 'fi'
 686                         newpos, data = parseit(buf, curpmode, newpos, lvl)
 687                         if not data or data[-1] not in ('fi', ):
 688                                 raise error, 'wrong if...else... termination' \
 689                                           + lle(lvl, buf, where) \
 690                                           + epsilon(buf, newpos)
 691
 692                         ifterm = data[-1]
 693                         del data[-1]
 694                         result.append(chunk(IF, where, (flag, 1, data)))
 695                 #done implicitely: return None, newpos
 696
 697         elif s(buf, data) in ('else', 'fi'):
 698                 result.append(s(buf, data))
 699                 # order calling party to return tuple
 700                 return (newpos, result), newpos
 701
 702         # end of \if, \else, ... \fi handling
 703
 704         elif s(buf, saveddata) == 'verb':
 705                 x2 = saveddata[1]
 706                 result.append(chunk(CSNAME, where, data))
 707                 if x2 == end:
 708                         raise error, 'premature end of command.' + lle(lvl, buf, where)
 709                 delimchar = buf[x2]
 710                 ##print 'VERB: delimchar ' + `delimchar`
 711                 pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
 712                 if pos < 0:
 713                         raise error, 'end of \'verb\' argument (' + \
 714                                   `delimchar` + ') not found.' + \
 715                                   lle(lvl, buf, where)
 716                 result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
 717                 newpos = pos + 1
 718         else:
 719                 result.append(chunk(CSNAME, where, data))
 720         return None, newpos
 721
 722 # this is just a function to get the string value if the possible data-tuple
 723 def s(buf, data):
 724         if type(data) == type(''):
 725                 return data
 726         if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
 727                 raise TypeError, 'expected tuple of 2 integers'
 728         x1, x2 = data
 729         return buf[x1:x2]
 730
 731
 732 ##length, data1, i = getnextarg(length, buf, pp, i + 1)
 733
 734 # make a deep-copy of some chunks
 735 def crcopy(r):
 736         result = []
 737         for x in r:
 738                 result.append(chunkcopy(x))
 739         return result
 740
 741
 742
 743 # copy a chunk, would better be a method of class Chunk...
 744 def chunkcopy(ch):
 745         if ch.chtype == chunk_type(GROUP):
 746                 listc = ch.data[:]
 747                 for i in range(len(listc)):
 748                         listc[i] = chunkcopy(listc[i])
 749                 return chunk(GROUP, ch.where, listc)
 750         else:
 751                 return chunk(ch.chtype, ch.where, ch.data)
 752
 753
 754 # get next argument for TeX-macro, flatten a group (insert between)
 755 # or return Command Sequence token, or give back one character
 756 def getnextarg(length, buf, pp, item):
 757
 758         ##wobj = Wobj().init()
 759         ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
 760         ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
 761
 762         while item < length and pp[item].chtype == chunk_type(ENDLINE):
 763                 del pp[item]
 764                 length = length - 1
 765         if item >= length:
 766                 raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
 767         if pp[item].chtype == chunk_type(GROUP):
 768                 newpp = pp[item].data
 769                 del pp[item]
 770                 length = length - 1
 771                 changeit(buf, newpp)
 772                 length = length + len(newpp)
 773                 pp[item:item] = newpp
 774                 item = item + len(newpp)
 775                 if len(newpp) < 10:
 776                         wobj = Wobj().init()
 777                         dumpit(buf, wobj.write, newpp)
 778                         ##print 'GETNEXTARG: inserted ' + `wobj.data`
 779                 return length, item
 780         elif pp[item].chtype == chunk_type(PLAIN):
 781                 #grab one char
 782                 print 'WARNING: grabbing one char'
 783                 if len(s(buf, pp[item].data)) > 1:
 784                         pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
 785                         item, length = item+1, length+1
 786                         pp[item].data = s(buf, pp[item].data)[1:]
 787                 else:
 788                         item = item+1
 789                 return length, item
 790         else:
 791                 try:
 792                         str = `s(buf, ch.data)`
 793                 except TypeError:
 794                         str = `ch.data`
 795                         if len(str) > 400:
 796                                 str = str[:400] + '...'
 797                 print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
 798                 return length, item
 799
 800
 801 # this one is needed to find the end of LaTeX's optional argument, like
 802 # item[...]
 803 re_endopt = regex.compile(']')
 804
 805 # get a LaTeX-optional argument, you know, the square braces '[' and ']'
 806 def getoptarg(length, buf, pp, item):
 807
 808         wobj = Wobj().init()
 809         dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
 810         ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
 811
 812         if item >= length or \
 813                   pp[item].chtype != chunk_type(PLAIN) or \
 814                   s(buf, pp[item].data)[0] != '[':
 815                 return length, item
 816
 817         pp[item].data = s(buf, pp[item].data)[1:]
 818         if len(pp[item].data) == 0:
 819                 del pp[item]
 820                 length = length-1
 821
 822         while 1:
 823                 if item == length:
 824                         raise error, 'No end of optional arg found'
 825                 if pp[item].chtype == chunk_type(PLAIN):
 826                         text = s(buf, pp[item].data)
 827                         pos = re_endopt.search(text)
 828                         if pos >= 0:
 829                                 pp[item].data = text[:pos]
 830                                 if pos == 0:
 831                                         del pp[item]
 832                                         length = length-1
 833                                 else:
 834                                         item=item+1
 835                                 text = text[pos+1:]
 836
 837                                 while text and text[0] in ' \t':
 838                                         text = text[1:]
 839
 840                                 if text:
 841                                         pp.insert(item, chunk(PLAIN, 0, text))
 842                                         length = length + 1
 843                                 return length, item
 844
 845                 item = item+1
 846
 847
 848 # Wobj just add write-requests to the ``data'' attribute
 849 class Wobj:
 850         def init(self):
 851                 self.data = ''
 852                 return self
 853         def write(self, data):
 854                 self.data = self.data + data
 855
 856 # ignore these commands
 857 ignoredcommands = ('bcode', 'ecode', 'optional')
 858 # map commands like these to themselves as plaintext
 859 wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF')
 860 # \{ --> {,  \} --> }, etc
 861 themselves = ('{', '}', '.', '@') + wordsselves
 862 # these ones also themselves (see argargs macro in myformat.sty)
 863 inargsselves = (',', '[', ']', '(', ')')
 864 # this is how *I* would show the difference between emph and strong
 865 #  code 1 means: fold to uppercase
 866 markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
 867           'strong': ('*', '*')}
 868
 869 # recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
 870 fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
 871
 872 # transparent for these commands
 873 for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', \
 874           'r', 'i', 't')
 875
 876
 877 # try to remove macros and return flat text
 878 def flattext(buf, pp):
 879         pp = crcopy(pp)
 880         ##print '---> FLATTEXT ' + `pp`
 881         wobj = Wobj().init()
 882
 883         i, length = 0, len(pp)
 884         while 1:
 885                 if len(pp) != length:
 886                         raise 'FATAL', 'inconsistent length'
 887                 if i >= length:
 888                         break
 889                 ch = pp[i]
 890                 i = i+1
 891                 if ch.chtype == chunk_type(PLAIN):
 892                         pass
 893                 elif ch.chtype == chunk_type(CSNAME):
 894                         if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
 895                                 ch.chtype = chunk_type(PLAIN)
 896                         elif s(buf, ch.data) == 'e':
 897                                 ch.chtype = chunk_type(PLAIN)
 898                                 ch.data = '\\'
 899                         elif len(s(buf, ch.data)) == 1 \
 900                                   and s(buf, ch.data) in onlylatexspecial:
 901                                 ch.chtype = chunk_type(PLAIN)
 902                                 # if it is followed by an empty group,
 903                                 # remove that group, it was needed for
 904                                 # a true space
 905                                 if i < length \
 906                                           and pp[i].chtype==chunk_type(GROUP) \
 907                                           and len(pp[i].data) == 0:
 908                                         del pp[i]
 909                                         length = length-1
 910
 911                         elif s(buf, ch.data) in markcmds.keys():
 912                                 length, newi = getnextarg(length, buf, pp, i)
 913                                 str = flattext(buf, pp[i:newi])
 914                                 del pp[i:newi]
 915                                 length = length - (newi - i)
 916                                 ch.chtype = chunk_type(PLAIN)
 917                                 markcmd = s(buf, ch.data)
 918                                 x = markcmds[markcmd]
 919                                 if type(x) == type(()):
 920                                         pre, after = x
 921                                         str = pre+str+after
 922                                 elif x == 1:
 923                                         str = string.upper(str)
 924                                 else:
 925                                         raise 'FATAL', 'corrupt markcmds'
 926                                 ch.data = str
 927                         else:
 928                                 if s(buf, ch.data) not in ignoredcommands:
 929                                         print 'WARNING: deleting command ' + `s(buf, ch.data)`
 930                                         print 'PP' + `pp[i-1]`
 931                                 del pp[i-1]
 932                                 i, length = i-1, length-1
 933                 elif ch.chtype == chunk_type(GROUP):
 934                         length, newi = getnextarg(length, buf, pp, i-1)
 935                         i = i-1
 936 ##                      str = flattext(buf, crcopy(pp[i-1:newi]))
 937 ##                      del pp[i:newi]
 938 ##                      length = length - (newi - i)
 939 ##                      ch.chtype = chunk_type(PLAIN)
 940 ##                      ch.data = str
 941                 else:
 942                         pass
 943
 944         dumpit(buf, wobj.write, pp)
 945         ##print 'FLATTEXT: RETURNING ' + `wobj.data`
 946         return wobj.data
 947
 948 # try to generate node names (a bit shorter than the chapter title)
 949 # note that the \nodename command (see elsewhere) overules these efforts
 950 def invent_node_names(text):
 951         words = string.split(text)
 952
 953         ##print 'WORDS ' + `words`
 954
 955         if len(words) == 2 \
 956                   and string.lower(words[0]) == 'built-in' \
 957                   and string.lower(words[1]) not in ('modules', 'functions'):
 958                 return words[1]
 959         if len(words) == 3 and string.lower(words[1]) == 'module':
 960                 return words[2]
 961         if len(words) == 3 and string.lower(words[1]) == 'object':
 962                 return string.join(words[0:2])
 963         if len(words) > 4 and string.lower(string.join(words[-4:])) == \
 964                   'methods and data attributes':
 965                 return string.join(words[:2])
 966         return text
 967
 968 re_commas_etc = regex.compile('[,`\'@{}]')
 969
 970 re_whitespace = regex.compile('[ \t]*')
 971
 972
 973 ##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
 974
 975 # look if the next non-white stuff is also a command, resulting in skipping
 976 # double endlines (DENDLINE) too, and thus omitting \par's
 977 # Sometimes this is too much, maybe consider DENDLINE's as stop
 978 def next_command_p(length, buf, pp, i, cmdname):
 979
 980         while 1:
 981                 if i >= len(pp):
 982                         break
 983                 ch = pp[i]
 984                 i = i+1
 985                 if ch.chtype == chunk_type(ENDLINE):
 986                         continue
 987                 if ch.chtype == chunk_type(DENDLINE):
 988                         continue
 989                 if ch.chtype == chunk_type(PLAIN):
 990                         if re_whitespace.search(s(buf, ch.data)) == 0 and \
 991                                   re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
 992                                 continue
 993                         return -1
 994                 if ch.chtype == chunk_type(CSNAME):
 995                         if s(buf, ch.data) == cmdname:
 996                                 return i # _after_ the command
 997                         return -1
 998                 return -1
 999
1000
1001 # things that are special to LaTeX, but not to texi..
1002 onlylatexspecial = '_~^$#&%'
1003
1004 class Struct: pass
1005
1006 hist = Struct()
1007 out = Struct()
1008
1009 def startchange():
1010         global hist, out
1011
1012         hist.inenv = []
1013         hist.nodenames = []
1014         hist.cindex = []
1015         hist.inargs = 0
1016         hist.enumeratenesting, hist.itemizenesting = 0, 0
1017
1018         out.doublenodes = []
1019         out.doublecindeces = []
1020
1021
1022 spacech = [chunk(PLAIN, 0, ' ')]
1023 commach = [chunk(PLAIN, 0, ', ')]
1024 cindexch = [chunk(CSLINE, 0, 'cindex')]
1025
1026 # the standard variation in symbols for itemize
1027 itemizesymbols = ['bullet', 'minus', 'dots']
1028
1029 # same for enumerate
1030 enumeratesymbols = ['1', 'A', 'a']
1031
1032 ##
1033 ## \begin{ {func,data,exc}desc }{name}...
1034 ##   the resulting texi-code is dependent on the contents of indexsubitem
1035 ##
1036
1037 # indexsubitem: `['XXX', 'function']
1038 # funcdesc:
1039 #     deffn {`idxsi`} NAME (FUNCARGS)
1040
1041 # indexsubitem: `['XXX', 'method']`
1042 # funcdesc:
1043 #     defmethod {`idxsi[0]`} NAME (FUNCARGS)
1044
1045 # indexsubitem: `['in', 'module', 'MODNAME']'
1046 # datadesc:
1047 #     defcv data {`idxsi[1:]`} NAME
1048 # excdesc:
1049 #     defcv exception {`idxsi[1:]`} NAME
1050 # funcdesc:
1051 #     deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1052
1053 # indexsubitem: `['OBJECT', 'attribute']'
1054 # datadesc
1055 #     defcv attribute {`OBJECT`} NAME
1056
1057
1058 ## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1059 ##   or \funcline{NAME}{ARGS}
1060 ##
1061 def do_funcdesc(length, buf, pp, i):
1062         startpoint = i-1
1063         ch = pp[startpoint]
1064         wh = ch.where
1065         length, newi = getnextarg(length, buf, pp, i)
1066         funcname = chunk(GROUP, wh, pp[i:newi])
1067         del pp[i:newi]
1068         length = length - (newi-i)
1069         save = hist.inargs
1070         hist.inargs = 1
1071         length, newi = getnextarg(length, buf, pp, i)
1072         hist.inargs = save
1073         del save
1074         the_args = [chunk(PLAIN, wh, '()'[0])] + \
1075                   pp[i:newi] + \
1076                   [chunk(PLAIN, wh, '()'[1])]
1077         del pp[i:newi]
1078         length = length - (newi-i)
1079
1080         idxsi = hist.indexsubitem       # words
1081         command = ''
1082         cat_class = ''
1083         if idxsi and idxsi[-1] in ('method', 'attribute'):
1084                 command = 'defmethod'
1085                 cat_class = string.join(idxsi[:-1])
1086         elif len(idxsi) == 2 and idxsi[1] == 'function':
1087                 command = 'deffn'
1088                 cat_class = string.join(idxsi)
1089         elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1090                 command = 'deffn'
1091                 cat_class = 'function of ' + string.join(idxsi[1:])
1092
1093         if not command:
1094                 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1095
1096         ch.chtype = chunk_type(CSLINE)
1097         ch.data = command
1098
1099         cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1100         cslinearg.append(chunk(PLAIN, wh, ' '))
1101         cslinearg.append(funcname)
1102         cslinearg.append(chunk(PLAIN, wh, ' '))
1103         l = len(cslinearg)
1104         cslinearg[l:l] = the_args
1105
1106         pp.insert(i, chunk(GROUP, wh, cslinearg))
1107         i, length = i+1, length+1
1108         hist.command = command
1109         return length, i
1110
1111
1112 ## this routine will be called on \begin{excdesc}{NAME}
1113 ## or \excline{NAME}
1114 ##
1115 def do_excdesc(length, buf, pp, i):
1116         startpoint = i-1
1117         ch = pp[startpoint]
1118         wh = ch.where
1119         length, newi = getnextarg(length, buf, pp, i)
1120         excname = chunk(GROUP, wh, pp[i:newi])
1121         del pp[i:newi]
1122         length = length - (newi-i)
1123
1124         idxsi = hist.indexsubitem       # words
1125         command = ''
1126         cat_class = ''
1127         class_class = ''
1128         if len(idxsi) == 2 and idxsi[1] == 'exception':
1129                 command = 'defvr'
1130                 cat_class = string.join(idxsi)
1131         elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1132                 command = 'defcv'
1133                 cat_class = 'exception'
1134                 class_class = string.join(idxsi[1:])
1135         elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
1136                 command = 'defcv'
1137                 cat_class = 'exception'
1138                 class_class = string.join(idxsi[2:])
1139
1140
1141         if not command:
1142                 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1143
1144         ch.chtype = chunk_type(CSLINE)
1145         ch.data = command
1146
1147         cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1148         cslinearg.append(chunk(PLAIN, wh, ' '))
1149         if class_class:
1150                 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1151                 cslinearg.append(chunk(PLAIN, wh, ' '))
1152         cslinearg.append(excname)
1153
1154         pp.insert(i, chunk(GROUP, wh, cslinearg))
1155         i, length = i+1, length+1
1156         hist.command = command
1157         return length, i
1158
1159 ## same for datadesc or dataline...
1160 def do_datadesc(length, buf, pp, i):
1161         startpoint = i-1
1162         ch = pp[startpoint]
1163         wh = ch.where
1164         length, newi = getnextarg(length, buf, pp, i)
1165         dataname = chunk(GROUP, wh, pp[i:newi])
1166         del pp[i:newi]
1167         length = length - (newi-i)
1168
1169         idxsi = hist.indexsubitem       # words
1170         command = ''
1171         cat_class = ''
1172         class_class = ''
1173         if idxsi[-1] == 'attribute':
1174                 command = 'defcv'
1175                 cat_class = 'attribute'
1176                 class_class = string.join(idxsi[:-1])
1177         elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1178                 command = 'defcv'
1179                 cat_class = 'data'
1180                 class_class = string.join(idxsi[1:])
1181         elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
1182                 command = 'defcv'
1183                 cat_class = 'data'
1184                 class_class = string.join(idxsi[2:])
1185
1186
1187         if not command:
1188                 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1189
1190         ch.chtype = chunk_type(CSLINE)
1191         ch.data = command
1192
1193         cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1194         cslinearg.append(chunk(PLAIN, wh, ' '))
1195         if class_class:
1196                 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1197                 cslinearg.append(chunk(PLAIN, wh, ' '))
1198         cslinearg.append(dataname)
1199
1200         pp.insert(i, chunk(GROUP, wh, cslinearg))
1201         i, length = i+1, length+1
1202         hist.command = command
1203         return length, i
1204
1205
1206 # regular indices: those that are not set in tt font by default....
1207 regindices = ('cindex', )
1208
1209 # remove illegal characters from node names
1210 def rm_commas_etc(text):
1211         result = ''
1212         changed = 0
1213         while 1:
1214                 pos = re_commas_etc.search(text)
1215                 if pos >= 0:
1216                         changed = 1
1217                         result = result + text[:pos]
1218                         text = text[pos+1:]
1219                 else:
1220                         result = result + text
1221                         break
1222         if changed:
1223                 print 'Warning: nodename changhed to ' + `result`
1224
1225         return result
1226
1227 # boolean flags
1228 flags = {'texi': 1}
1229
1230
1231 ##
1232 ## changeit: the actual routine, that changes the contents of the parsed
1233 ##           chunks
1234 ##
1235
1236 def changeit(buf, pp):
1237         global onlylatexspecial, hist, out
1238
1239         i, length = 0, len(pp)
1240         while 1:
1241                 # sanity check: length should always equal len(pp)
1242                 if len(pp) != length:
1243                         raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
1244                 if i >= length:
1245                         break
1246                 ch = pp[i]
1247                 i = i + 1
1248
1249                 if type(ch) == type(''):
1250                         #normally, only chunks are present in pp,
1251                         # but in some cases, some extra info
1252                         # has been inserted, e.g., the \end{...} clauses
1253                         raise 'FATAL', 'got string, probably too many ' + `end`
1254
1255                 if ch.chtype == chunk_type(GROUP):
1256                         # check for {\em ...} constructs
1257                         if ch.data and \
1258                                   ch.data[0].chtype == chunk_type(CSNAME) and \
1259                                   s(buf, ch.data[0].data) in fontchanges.keys():
1260                                 k = s(buf, ch.data[0].data)
1261                                 del ch.data[0]
1262                                 pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
1263                                 length, i = length+1, i+1
1264
1265                         # recursively parse the contents of the group
1266                         changeit(buf, ch.data)
1267
1268                 elif ch.chtype == chunk_type(IF):
1269                         # \if...
1270                         flag, negate, data = ch.data
1271                         ##print 'IF: flag, negate = ' + `flag, negate`
1272                         if flag not in flags.keys():
1273                                 raise error, 'unknown flag ' + `flag`
1274
1275                         value = flags[flag]
1276                         if negate:
1277                                 value = (not value)
1278                         del pp[i-1]
1279                         length, i = length-1, i-1
1280                         if value:
1281                                 pp[i:i] = data
1282                                 length = length + len(data)
1283
1284
1285                 elif ch.chtype == chunk_type(ENV):
1286                         # \begin{...} ....
1287                         envname, data = ch.data
1288
1289                         #push this environment name on stack
1290                         hist.inenv.insert(0, envname)
1291
1292                         #append an endenv chunk after grouped data
1293                         data.append(chunk(ENDENV, ch.where, envname))
1294                         ##[`data`]
1295
1296                         #delete this object
1297                         del pp[i-1]
1298                         i, length = i-1, length-1
1299
1300                         #insert found data
1301                         pp[i:i] = data
1302                         length = length + len(data)
1303
1304                         if envname == 'verbatim':
1305                                 pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
1306                                           chunk(GROUP, ch.where, [])]
1307                                 length, i = length+2, i+2
1308
1309                         elif envname == 'itemize':
1310                                 if hist.itemizenesting > len(itemizesymbols):
1311                                         raise error, 'too deep itemize nesting'
1312                                 ingroupch = [chunk(CSNAME, ch.where,\
1313                                           itemizesymbols[hist.itemizenesting])]
1314                                 hist.itemizenesting = hist.itemizenesting + 1
1315                                 pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
1316                                           chunk(GROUP, ch.where, ingroupch)]
1317                                 length, i = length+2, i+2
1318
1319                         elif envname == 'enumerate':
1320                                 if hist.enumeratenesting > len(enumeratesymbols):
1321                                         raise error, 'too deep enumerate nesting'
1322                                 ingroupch = [chunk(PLAIN, ch.where,\
1323                                           enumeratesymbols[hist.enumeratenesting])]
1324                                 hist.enumeratenesting = hist.enumeratenesting + 1
1325                                 pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
1326                                           chunk(GROUP, ch.where, ingroupch)]
1327                                 length, i = length+2, i+2
1328
1329                         elif envname == 'description':
1330                                 ingroupch = [chunk(CSNAME, ch.where, 'b')]
1331                                 pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
1332                                           chunk(GROUP, ch.where, ingroupch)]
1333                                 length, i = length+2, i+2
1334
1335                         elif envname == 'tableiii':
1336                                 wh = ch.where
1337                                 newcode = []
1338
1339                                 #delete tabular format description
1340                                 # e.g., {|l|c|l|}
1341                                 length, newi = getnextarg(length, buf, pp, i)
1342                                 del pp[i:newi]
1343                                 length = length - (newi-i)
1344
1345                                 newcode.append(chunk(CSLINE, wh, 'table'))
1346                                 ingroupch = [chunk(CSNAME, wh, 'asis')]
1347                                 newcode.append(chunk(GROUP, wh, ingroupch))
1348                                 newcode.append(chunk(CSLINE, wh, 'item'))
1349
1350                                 #get the name of macro for @item
1351                                 # e.g., {code}
1352                                 length, newi = getnextarg(length, buf, pp, i)
1353
1354                                 if newi-i != 1:
1355                                         raise error, 'Sorry, expected 1 chunk argument'
1356                                 if pp[i].chtype != chunk_type(PLAIN):
1357                                         raise error, 'Sorry, expected plain text argument'
1358                                 hist.itemargmacro = s(buf, pp[i].data)
1359                                 del pp[i:newi]
1360                                 length = length - (newi-i)
1361
1362                                 for count in range(3):
1363                                         length, newi = getnextarg(length, buf, pp, i)
1364                                         emphgroup = [\
1365                                                   chunk(CSNAME, wh, 'emph'), \
1366                                                   chunk(GROUP, 0, pp[i:newi])]
1367                                         del pp[i:newi]
1368                                         length = length - (newi-i)
1369                                         if count == 0:
1370                                                 itemarg = emphgroup
1371                                         elif count == 2:
1372                                                 itembody = itembody + \
1373                                                           [chunk(PLAIN, wh, '  ---  ')] + \
1374                                                           emphgroup
1375                                         else:
1376                                                 itembody = emphgroup
1377                                 newcode.append(chunk(GROUP, wh, itemarg))
1378                                 newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
1379                                 pp[i:i] = newcode
1380                                 l = len(newcode)
1381                                 length, i = length+l, i+l
1382                                 del newcode, l
1383
1384                                 if length != len(pp):
1385                                         raise 'STILL, SOMETHING wrong', `i`
1386
1387
1388                         elif envname == 'funcdesc':
1389                                 pp.insert(i, chunk(PLAIN, ch.where, ''))
1390                                 i, length = i+1, length+1
1391                                 length, i = do_funcdesc(length, buf, pp, i)
1392
1393                         elif envname == 'excdesc':
1394                                 pp.insert(i, chunk(PLAIN, ch.where, ''))
1395                                 i, length = i+1, length+1
1396                                 length, i = do_excdesc(length, buf, pp, i)
1397
1398                         elif envname == 'datadesc':
1399                                 pp.insert(i, chunk(PLAIN, ch.where, ''))
1400                                 i, length = i+1, length+1
1401                                 length, i = do_datadesc(length, buf, pp, i)
1402
1403                         else:
1404                                 print 'WARNING: don\'t know what to do with env ' + `envname`
1405
1406                 elif ch.chtype == chunk_type(ENDENV):
1407                         envname = ch.data
1408                         if envname != hist.inenv[0]:
1409                                 raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
1410                         del hist.inenv[0]
1411                         del pp[i-1]
1412                         i, length = i-1, length-1
1413
1414                         if envname == 'verbatim':
1415                                 pp[i:i] = [\
1416                                           chunk(CSLINE, ch.where, 'end'), \
1417                                           chunk(GROUP, ch.where, [\
1418                                           chunk(PLAIN, ch.where, 'example')])]
1419                                 i, length = i+2, length+2
1420                         elif envname == 'itemize':
1421                                 hist.itemizenesting = hist.itemizenesting - 1
1422                                 pp[i:i] = [\
1423                                           chunk(CSLINE, ch.where, 'end'), \
1424                                           chunk(GROUP, ch.where, [\
1425                                           chunk(PLAIN, ch.where, 'itemize')])]
1426                                 i, length = i+2, length+2
1427                         elif envname == 'enumerate':
1428                                 hist.enumeratenesting = hist.enumeratenesting-1
1429                                 pp[i:i] = [\
1430                                           chunk(CSLINE, ch.where, 'end'), \
1431                                           chunk(GROUP, ch.where, [\
1432                                           chunk(PLAIN, ch.where, 'enumerate')])]
1433                                 i, length = i+2, length+2
1434                         elif envname == 'description':
1435                                 pp[i:i] = [\
1436                                           chunk(CSLINE, ch.where, 'end'), \
1437                                           chunk(GROUP, ch.where, [\
1438                                           chunk(PLAIN, ch.where, 'table')])]
1439                                 i, length = i+2, length+2
1440                         elif envname == 'tableiii':
1441                                 pp[i:i] = [\
1442                                           chunk(CSLINE, ch.where, 'end'), \
1443                                           chunk(GROUP, ch.where, [\
1444                                           chunk(PLAIN, ch.where, 'table')])]
1445                                 i, length = i+2, length + 2
1446                                 pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
1447                                 i, length = i+1, length+1
1448
1449                         elif envname in ('funcdesc', 'excdesc', 'datadesc'):
1450                                 pp[i:i] = [\
1451                                           chunk(CSLINE, ch.where, 'end'), \
1452                                           chunk(GROUP, ch.where, [\
1453                                           chunk(PLAIN, ch.where, hist.command)])]
1454                                 i, length = i+2, length+2
1455                         else:
1456                                 print 'WARNING: ending env ' + `envname` + 'has no actions'
1457
1458                 elif ch.chtype == chunk_type(CSNAME):
1459                         # control name transformations
1460                         if s(buf, ch.data) in ignoredcommands:
1461                                 del pp[i-1]
1462                                 i, length = i-1, length-1
1463                         elif s(buf, ch.data) == '@' and \
1464                                   i != length and \
1465                                   pp[i].chtype == chunk_type(PLAIN) and \
1466                                   s(buf, pp[i].data)[0] == '.':
1467                                 # \@. --> \. --> @.
1468                                 ch.data = '.'
1469                                 del pp[i]
1470                                 length = length-1
1471                         elif s(buf, ch.data) == '\\':
1472                                 # \\ --> \* --> @*
1473                                 ch.data = '*'
1474                         elif len(s(buf, ch.data)) == 1 and \
1475                                   s(buf, ch.data) in onlylatexspecial:
1476                                 ch.chtype = chunk_type(PLAIN)
1477                                 # check if such a command is followed by
1478                                 # an empty group: e.g., `\%{}'.  If so, remove
1479                                 # this empty group too
1480                                 if i < length and \
1481                                           pp[i].chtype == chunk_type(GROUP) \
1482                                           and len(pp[i].data) == 0:
1483                                         del pp[i]
1484                                         length = length-1
1485
1486                         elif hist.inargs and s(buf, ch.data) in inargsselves:
1487                                 # This is the special processing of the
1488                                 # arguments of the \begin{funcdesc}... or
1489                                 # \funcline... arguments
1490                                 # \, --> , \[ --> [, \] --> ]
1491                                 ch.chtype = chunk_type(PLAIN)
1492
1493                         elif s(buf, ch.data) == 'renewcommand':
1494                                 # \renewcommand{\indexsubitem}....
1495                                 i, length = i-1, length-1
1496                                 del pp[i]
1497                                 length, newi = getnextarg(length, buf, pp, i)
1498                                 if newi-i == 1 \
1499                                           and i < length \
1500                                           and pp[i].chtype == chunk_type(CSNAME) \
1501                                           and s(buf, pp[i].data) == 'indexsubitem':
1502                                         del pp[i:newi]
1503                                         length = length - (newi-i)
1504                                         length, newi = getnextarg(length, buf, pp, i)
1505                                         text = flattext(buf, pp[i:newi])
1506                                         if text[0] != '(' or text[-1] != ')':
1507                                                 raise error, 'expected indexsubitme enclosed in braces'
1508                                         words = string.split(text[1:-1])
1509                                         hist.indexsubitem = words
1510                                         del text, words
1511                                 else:
1512                                         print 'WARNING: renewcommand with unsupported arg removed'
1513                                 del pp[i:newi]
1514                                 length = length - (newi-i)
1515
1516                         elif s(buf, ch.data) == 'item':
1517                                 ch.chtype = chunk_type(CSLINE)
1518                                 length, newi = getoptarg(length, buf, pp, i)
1519                                 ingroupch = pp[i:newi]
1520                                 del pp[i:newi]
1521                                 length = length - (newi-i)
1522                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1523                                 i, length = i+1, length+1
1524
1525                         elif s(buf, ch.data) == 'ttindex':
1526                                 idxsi = hist.indexsubitem
1527
1528                                 cat_class = ''
1529                                 if len(idxsi) >= 2 and idxsi[1] in \
1530                                           ('method', 'function'):
1531                                         command = 'findex'
1532                                 elif len(idxsi) >= 2 and idxsi[1] in \
1533                                           ('exception', 'object'):
1534                                         command = 'vindex'
1535                                 else:
1536                                         print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
1537                                         command = 'cindex'
1538
1539                                 if not cat_class:
1540                                         cat_class = '('+string.join(idxsi)+')'
1541
1542                                 ch.chtype = chunk_type(CSLINE)
1543                                 ch.data = command
1544
1545                                 length, newi = getnextarg(length, buf, pp, i)
1546                                 arg = pp[i:newi]
1547                                 del pp[i:newi]
1548                                 length = length - (newi-i)
1549
1550                                 cat_arg = [chunk(PLAIN, ch.where, cat_class)]
1551
1552                                 # determine what should be set in roman, and
1553                                 # what in tt-font
1554                                 if command in regindices:
1555
1556                                         arg = [chunk(CSNAME, ch.where, 't'), \
1557                                                   chunk(GROUP, ch.where, arg)]
1558                                 else:
1559                                         cat_arg = [chunk(CSNAME, ch.where, 'r'), \
1560                                                   chunk(GROUP, ch.where, cat_arg)]
1561
1562                                 ingroupch = arg + \
1563                                           [chunk(PLAIN, ch.where, ' ')] + \
1564                                           cat_arg
1565
1566                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1567                                 length, i = length+1, i+1
1568
1569
1570                         elif s(buf, ch.data) == 'ldots':
1571                                 # \ldots --> \dots{} --> @dots{}
1572                                 ch.data = 'dots'
1573                                 if i == length \
1574                                           or pp[i].chtype != chunk_type(GROUP) \
1575                                           or pp[i].data != []:
1576                                         pp.insert(i, chunk(GROUP, ch.where, []))
1577                                         i, length = i+1, length+1
1578                         elif s(buf, ch.data) in wordsselves:
1579                                 # \UNIX --> UNIX
1580                                 ch.chtype = chunk_type(PLAIN)
1581                                 if i != length \
1582                                           and pp[i].chtype == chunk_type(GROUP) \
1583                                           and pp[i].data == []:
1584                                         del pp[i]
1585                                         length = length-1
1586                         elif s(buf, ch.data) in for_texi:
1587                                 pass
1588
1589                         elif s(buf, ch.data) == 'e':
1590                                 # \e --> \
1591                                 ch.data = '\\'
1592                                 ch.chtype = chunk_type(PLAIN)
1593                         elif s(buf, ch.data) == 'lineiii':
1594                                 # This is the most tricky one
1595                                 # \lineiii{a1}{a2}[{a3}] -->
1596                                 # @item @<cts. of itemargmacro>{a1}
1597                                 #  a2 [ -- a3]
1598                                 #
1599                                 ##print 'LINEIIIIII!!!!!!!'
1600 ##                              wobj = Wobj().init()
1601 ##                              dumpit(buf, wobj.write, pp[i-1:i+5])
1602 ##                              print '--->' + wobj.data + '<----'
1603                                 if not hist.inenv:
1604                                         raise error, \
1605                                                   'no environment for lineiii'
1606                                 if hist.inenv[0] != 'tableiii':
1607                                         raise error, \
1608                                                   'wrong command (' + \
1609                                                   `'lineiii'` + \
1610                                                   ') in wrong environment (' \
1611                                                   + `hist.inenv[0]` + ')'
1612                                 ch.chtype = chunk_type(CSLINE)
1613                                 ch.data = 'item'
1614                                 length, newi = getnextarg(length, buf, pp, i)
1615                                 ingroupch = [chunk(CSNAME, 0, \
1616                                           hist.itemargmacro), \
1617                                           chunk(GROUP, 0, pp[i:newi])]
1618                                 del pp[i:newi]
1619                                 length = length - (newi-i)
1620 ##                              print 'ITEM ARG: --->',
1621 ##                              wobj = Wobj().init()
1622 ##                              dumpit(buf, wobj.write, ingroupch)
1623 ##                              print wobj.data, '<---'
1624                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1625                                 grouppos = i
1626                                 i, length = i+1, length+1
1627                                 length, i = getnextarg(length, buf, pp, i)
1628                                 length, newi = getnextarg(length, buf, pp, i)
1629                                 if newi > i:
1630                                         # we have a 3rd arg
1631                                         pp.insert(i, chunk(PLAIN, ch.where, '  ---  '))
1632                                         i = newi + 1
1633                                         length = length + 1
1634 ##                                      pp[grouppos].data = pp[grouppos].data \
1635 ##                                                + [chunk(PLAIN, ch.where, '  ')] \
1636 ##                                                + pp[i:newi]
1637 ##                                      del pp[i:newi]
1638 ##                                      length = length - (newi-i)
1639                                 if length != len(pp):
1640                                         raise 'IN LINEIII IS THE ERR', `i`
1641
1642                         elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
1643                                 #\xxxsection{A} ---->
1644                                 # @node A, , ,
1645                                 # @xxxsection A
1646                                 ## also: remove commas and quotes
1647                                 ch.chtype = chunk_type(CSLINE)
1648                                 length, newi = getnextarg(length, buf, pp, i)
1649                                 afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
1650                                 if afternodenamecmd < 0:
1651                                         cp1 = crcopy(pp[i:newi])
1652                                         pp[i:newi] = [\
1653                                                   chunk(GROUP, ch.where, \
1654                                                   pp[i:newi])]
1655                                         length, newi = length - (newi-i) + 1, \
1656                                                   i+1
1657                                         text = flattext(buf, cp1)
1658                                         text = invent_node_names(text)
1659                                 else:
1660                                         length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
1661                                         cp1 = crcopy(pp[afternodenamecmd:endarg])
1662                                         del pp[newi:endarg]
1663                                         length = length - (endarg-newi)
1664
1665                                         pp[i:newi] = [\
1666                                                   chunk(GROUP, ch.where, \
1667                                                   pp[i:newi])]
1668                                         length, newi = length - (newi-i) + 1, \
1669                                                   i + 1
1670                                         text = flattext(buf, cp1)
1671                                 if text[-1] == '.':
1672                                         text = text[:-1]
1673 ##                              print 'FLATTEXT:', `text`
1674                                 if text in hist.nodenames:
1675                                         print 'WARNING: node name ' + `text` + ' already used'
1676                                         out.doublenodes.append(text)
1677                                 else:
1678                                         hist.nodenames.append(text)
1679                                 text = rm_commas_etc(text)
1680                                 pp[i-1:i-1] = [\
1681                                           chunk(CSLINE, ch.where, 'node'), \
1682                                           chunk(GROUP, ch.where, [\
1683                                           chunk(PLAIN, ch.where, text+', , ,')\
1684                                           ])]
1685                                 i, length = newi+2, length+2
1686
1687                         elif s(buf,ch.data) == 'funcline':
1688                                 # fold it to a very short environment
1689                                 pp[i-1:i-1] = [\
1690                                           chunk(CSLINE, ch.where, 'end'), \
1691                                           chunk(GROUP, ch.where, [\
1692                                           chunk(PLAIN, ch.where, hist.command)])]
1693                                 i, length = i+2, length+2
1694                                 length, i = do_funcdesc(length, buf, pp, i)
1695
1696                         elif s(buf,ch.data) == 'dataline':
1697                                 pp[i-1:i-1] = [\
1698                                           chunk(CSLINE, ch.where, 'end'), \
1699                                           chunk(GROUP, ch.where, [\
1700                                           chunk(PLAIN, ch.where, hist.command)])]
1701                                 i, length = i+2, length+2
1702                                 length, i = do_datadesc(length, buf, pp, i)
1703
1704                         elif s(buf,ch.data) == 'excline':
1705                                 pp[i-1:i-1] = [\
1706                                           chunk(CSLINE, ch.where, 'end'), \
1707                                           chunk(GROUP, ch.where, [\
1708                                           chunk(PLAIN, ch.where, hist.command)])]
1709                                 i, length = i+2, length+2
1710                                 length, i = do_excdesc(length, buf, pp, i)
1711
1712
1713                         elif s(buf, ch.data) == 'index':
1714                                 #\index{A} --->
1715                                 # @cindex A
1716                                 ch.chtype = chunk_type(CSLINE)
1717                                 ch.data = 'cindex'
1718                                 length, newi = getnextarg(length, buf, pp, i)
1719
1720                                 ingroupch = pp[i:newi]
1721                                 del pp[i:newi]
1722                                 length = length - (newi-i)
1723                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1724                                 length, i = length+1, i+1
1725
1726                         elif s(buf, ch.data) == 'bifuncindex':
1727                                 ch.chtype = chunk_type(CSLINE)
1728                                 ch.data = 'findex'
1729                                 length, newi = getnextarg(length, buf, pp, i)
1730                                 ingroupch = pp[i:newi]
1731                                 del pp[i:newi]
1732                                 length = length - (newi-i)
1733
1734                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1735                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1736                                 ingroupch.append(chunk(GROUP, ch.where, [\
1737                                           chunk(PLAIN, ch.where, \
1738                                           '(built-in function)')]))
1739
1740                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1741                                 length, i = length+1, i+1
1742
1743
1744                         elif s(buf, ch.data) == 'opindex':
1745                                 ch.chtype = chunk_type(CSLINE)
1746                                 ch.data = 'findex'
1747                                 length, newi = getnextarg(length, buf, pp, i)
1748                                 ingroupch = pp[i:newi]
1749                                 del pp[i:newi]
1750                                 length = length - (newi-i)
1751
1752                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1753                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1754                                 ingroupch.append(chunk(GROUP, ch.where, [\
1755                                           chunk(PLAIN, ch.where, \
1756                                           '(operator)')]))
1757
1758                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1759                                 length, i = length+1, i+1
1760
1761
1762                         elif s(buf, ch.data) == 'bimodindex':
1763                                 ch.chtype = chunk_type(CSLINE)
1764                                 ch.data = 'pindex'
1765                                 length, newi = getnextarg(length, buf, pp, i)
1766                                 ingroupch = pp[i:newi]
1767                                 del pp[i:newi]
1768                                 length = length - (newi-i)
1769
1770                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1771                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1772                                 ingroupch.append(chunk(GROUP, ch.where, [\
1773                                           chunk(PLAIN, ch.where, \
1774                                           '(built-in)')]))
1775
1776                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1777                                 length, i = length+1, i+1
1778
1779                         elif s(buf, ch.data) == 'sectcode':
1780                                 ch.data = 'code'
1781
1782
1783                         elif s(buf, ch.data) == 'stmodindex':
1784                                 ch.chtype = chunk_type(CSLINE)
1785                                 # use the program index as module index
1786                                 ch.data = 'pindex'
1787                                 length, newi = getnextarg(length, buf, pp, i)
1788                                 ingroupch = pp[i:newi]
1789                                 del pp[i:newi]
1790                                 length = length - (newi-i)
1791
1792                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1793                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1794                                 ingroupch.append(chunk(GROUP, ch.where, [\
1795                                           chunk(PLAIN, ch.where, \
1796                                           '(standard)')]))
1797
1798                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1799                                 length, i = length+1, i+1
1800
1801
1802                         elif s(buf, ch.data) == 'stindex':
1803                                 # XXX must actually go to newindex st
1804                                 wh = ch.where
1805                                 ch.chtype = chunk_type(CSLINE)
1806                                 ch.data = 'cindex'
1807                                 length, newi = getnextarg(length, buf, pp, i)
1808                                 ingroupch = [chunk(CSNAME, wh, 'code'), \
1809                                           chunk(GROUP, wh, pp[i:newi])]
1810
1811                                 del pp[i:newi]
1812                                 length = length - (newi-i)
1813
1814                                 t = ingroupch[:]
1815                                 t.append(chunk(PLAIN, wh, ' statement'))
1816
1817                                 pp.insert(i, chunk(GROUP, wh, t))
1818                                 i, length = i+1, length+1
1819
1820                                 pp.insert(i, chunk(CSLINE, wh, 'cindex'))
1821                                 i, length = i+1, length+1
1822
1823                                 t = ingroupch[:]
1824                                 t.insert(0, chunk(PLAIN, wh, 'statement, '))
1825
1826                                 pp.insert(i, chunk(GROUP, wh, t))
1827                                 i, length = i+1, length+1
1828
1829
1830                         elif s(buf, ch.data) == 'indexii':
1831                                 #\indexii{A}{B} --->
1832                                 # @cindex A B
1833                                 # @cindex B, A
1834                                 length, newi = getnextarg(length, buf, pp, i)
1835                                 cp11 = pp[i:newi]
1836                                 cp21 = crcopy(pp[i:newi])
1837                                 del pp[i:newi]
1838                                 length = length - (newi-i)
1839                                 length, newi = getnextarg(length, buf, pp, i)
1840                                 cp12 = pp[i:newi]
1841                                 cp22 = crcopy(pp[i:newi])
1842                                 del pp[i:newi]
1843                                 length = length - (newi-i)
1844
1845                                 ch.chtype = chunk_type(CSLINE)
1846                                 ch.data = 'cindex'
1847                                 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1848                                           chunk(PLAIN, ch.where, ' ')] + cp12))
1849                                 i, length = i+1, length+1
1850                                 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1851                                           chunk(GROUP, ch.where, cp22 + [\
1852                                           chunk(PLAIN, ch.where, ', ')]+ cp21)]
1853                                 i, length = i+2, length+2
1854
1855                         elif s(buf, ch.data) == 'indexiii':
1856                                 length, newi = getnextarg(length, buf, pp, i)
1857                                 cp11 = pp[i:newi]
1858                                 cp21 = crcopy(pp[i:newi])
1859                                 cp31 = crcopy(pp[i:newi])
1860                                 del pp[i:newi]
1861                                 length = length - (newi-i)
1862                                 length, newi = getnextarg(length, buf, pp, i)
1863                                 cp12 = pp[i:newi]
1864                                 cp22 = crcopy(pp[i:newi])
1865                                 cp32 = crcopy(pp[i:newi])
1866                                 del pp[i:newi]
1867                                 length = length - (newi-i)
1868                                 length, newi = getnextarg(length, buf, pp, i)
1869                                 cp13 = pp[i:newi]
1870                                 cp23 = crcopy(pp[i:newi])
1871                                 cp33 = crcopy(pp[i:newi])
1872                                 del pp[i:newi]
1873                                 length = length - (newi-i)
1874
1875                                 ch.chtype = chunk_type(CSLINE)
1876                                 ch.data = 'cindex'
1877                                 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1878                                           chunk(PLAIN, ch.where, ' ')] + cp12 \
1879                                           + [chunk(PLAIN, ch.where, ' ')] \
1880                                           + cp13))
1881                                 i, length = i+1, length+1
1882                                 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1883                                           chunk(GROUP, ch.where, cp22 + [\
1884                                           chunk(PLAIN, ch.where, ' ')]+ cp23\
1885                                           + [chunk(PLAIN, ch.where, ', ')] +\
1886                                           cp21)]
1887                                 i, length = i+2, length+2
1888                                 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1889                                           chunk(GROUP, ch.where, cp33 + [\
1890                                           chunk(PLAIN, ch.where, ', ')]+ cp31\
1891                                           + [chunk(PLAIN, ch.where, ' ')] +\
1892                                           cp32)]
1893                                 i, length = i+2, length+2
1894
1895
1896                         elif s(buf, ch.data) == 'indexiv':
1897                                 length, newi = getnextarg(length, buf, pp, i)
1898                                 cp11 = pp[i:newi]
1899                                 cp21 = crcopy(pp[i:newi])
1900                                 cp31 = crcopy(pp[i:newi])
1901                                 cp41 = crcopy(pp[i:newi])
1902                                 del pp[i:newi]
1903                                 length = length - (newi-i)
1904                                 length, newi = getnextarg(length, buf, pp, i)
1905                                 cp12 = pp[i:newi]
1906                                 cp22 = crcopy(pp[i:newi])
1907                                 cp32 = crcopy(pp[i:newi])
1908                                 cp42 = crcopy(pp[i:newi])
1909                                 del pp[i:newi]
1910                                 length = length - (newi-i)
1911                                 length, newi = getnextarg(length, buf, pp, i)
1912                                 cp13 = pp[i:newi]
1913                                 cp23 = crcopy(pp[i:newi])
1914                                 cp33 = crcopy(pp[i:newi])
1915                                 cp43 = crcopy(pp[i:newi])
1916                                 del pp[i:newi]
1917                                 length = length - (newi-i)
1918                                 length, newi = getnextarg(length, buf, pp, i)
1919                                 cp14 = pp[i:newi]
1920                                 cp24 = crcopy(pp[i:newi])
1921                                 cp34 = crcopy(pp[i:newi])
1922                                 cp44 = crcopy(pp[i:newi])
1923                                 del pp[i:newi]
1924                                 length = length - (newi-i)
1925
1926                                 ch.chtype = chunk_type(CSLINE)
1927                                 ch.data = 'cindex'
1928                                 ingroupch = cp11 + \
1929                                           spacech + cp12 + \
1930                                           spacech + cp13 + \
1931                                           spacech + cp14
1932                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1933                                 i, length = i+1, length+1
1934                                 ingroupch = cp22 + \
1935                                           spacech + cp23 + \
1936                                           spacech + cp24 + \
1937                                           commach + cp21
1938                                 pp[i:i] = cindexch + [\
1939                                           chunk(GROUP, ch.where, ingroupch)]
1940                                 i, length = i+2, length+2
1941                                 ingroupch = cp33 + \
1942                                           spacech + cp34 + \
1943                                           commach + cp31 + \
1944                                           spacech + cp32
1945                                 pp[i:i] = cindexch + [\
1946                                           chunk(GROUP, ch.where, ingroupch)]
1947                                 i, length = i+2, length+2
1948                                 ingroupch = cp44 + \
1949                                           commach + cp41 + \
1950                                           spacech + cp42 + \
1951                                           spacech + cp43
1952                                 pp[i:i] = cindexch + [\
1953                                           chunk(GROUP, ch.where, ingroupch)]
1954                                 i, length = i+2, length+2
1955
1956
1957
1958                         else:
1959                                 print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
1960
1961
1962
1963 re_atsign = regex.compile('[@{}]')
1964 re_newline = regex.compile('\n')
1965
1966 def dumpit(buf, wm, pp):
1967
1968         global out
1969
1970         i, length = 0, len(pp)
1971
1972         addspace = 0
1973
1974         while 1:
1975                 if len(pp) != length:
1976                         raise 'FATAL', 'inconsistent length'
1977                 if i == length:
1978                         break
1979                 ch = pp[i]
1980                 i = i + 1
1981
1982                 if addspace:
1983                         dospace = 1
1984                         addspace = 0
1985                 else:
1986                         dospace = 0
1987
1988                 if ch.chtype == chunk_type(CSNAME):
1989                         wm('@' + s(buf, ch.data))
1990                         if s(buf, ch.data) == 'node' and \
1991                                   pp[i].chtype == chunk_type(PLAIN) and \
1992                                   s(buf, pp[i].data) in out.doublenodes:
1993                                 ##XXX doesnt work yet??
1994                                 wm(' ZZZ-' + zfill(`i`, 4))
1995                         if s(buf, ch.data)[0] in string.letters:
1996                                 addspace = 1
1997                 elif ch.chtype == chunk_type(PLAIN):
1998                         if dospace and s(buf, ch.data) not in (' ', '\t'):
1999                                 wm(' ')
2000                         text = s(buf, ch.data)
2001                         while 1:
2002                                 pos = re_atsign.search(text)
2003                                 if pos < 0:
2004                                         break
2005                                 wm(text[:pos] + '@' + text[pos])
2006                                 text = text[pos+1:]
2007                         wm(text)
2008                 elif ch.chtype == chunk_type(GROUP):
2009                         wm('{')
2010                         dumpit(buf, wm, ch.data)
2011                         wm('}')
2012                 elif ch.chtype == chunk_type(DENDLINE):
2013                         wm('\n\n')
2014                         while i != length and pp[i].chtype in \
2015                                   (chunk_type(DENDLINE), chunk_type(ENDLINE)):
2016                                 i = i + 1
2017                 elif ch.chtype == chunk_type(OTHER):
2018                         wm(s(buf, ch.data))
2019                 elif ch.chtype == chunk_type(ACTIVE):
2020                         wm(s(buf, ch.data))
2021                 elif ch.chtype == chunk_type(ENDLINE):
2022                         wm('\n')
2023                 elif ch.chtype == chunk_type(CSLINE):
2024                         if i >= 2 and pp[i-2].chtype not in \
2025                                   (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2026                                   and (pp[i-2].chtype != chunk_type(PLAIN) \
2027                                   or s(buf, pp[i-2].data)[-1] != '\n'):
2028
2029                                 wm('\n')
2030                         wm('@' + s(buf, ch.data))
2031                         if i == length:
2032                                 raise error, 'CSLINE expected another chunk'
2033                         if pp[i].chtype != chunk_type(GROUP):
2034                                 raise error, 'CSLINE expected GROUP'
2035                         if type(pp[i].data) != type([]):
2036                                 raise error, 'GROUP chould contain []-data'
2037
2038                         wobj = Wobj().init()
2039                         dumpit(buf, wobj.write, pp[i].data)
2040                         i = i + 1
2041                         text = wobj.data
2042                         del wobj
2043                         if text:
2044                                 wm(' ')
2045                                 while 1:
2046                                         pos = re_newline.search(text)
2047                                         if pos < 0:
2048                                                 break
2049                                         print 'WARNING: found newline in csline arg'
2050                                         wm(text[:pos] + ' ')
2051                                         text = text[pos+1:]
2052                                 wm(text)
2053                         if i >= length or \
2054                                   pp[i].chtype not in (chunk_type(CSLINE), \
2055                                   chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2056                                   and (pp[i].chtype != chunk_type(PLAIN) \
2057                                   or s(buf, pp[i].data)[0] != '\n'):
2058                                 wm('\n')
2059
2060                 elif ch.chtype == chunk_type(COMMENT):
2061 ##                      print 'COMMENT: previous chunk =', pp[i-2]
2062                         if pp[i-2].chtype == chunk_type(PLAIN):
2063                                 print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
2064                         if s(buf, ch.data) and \
2065                                   regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
2066                                 if i >= 2 and pp[i-2].chtype not in \
2067                                           (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2068                                           and not (pp[i-2].chtype == chunk_type(PLAIN) \
2069                                           and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
2070                                         print 'ADDING NEWLINE'
2071                                         wm('\n')
2072                                 wm('@c ' + s(buf, ch.data))
2073                 elif ch.chtype == chunk_type(IGNORE):
2074                         pass
2075                 else:
2076                         try:
2077                                 str = `s(buf, ch.data)`
2078                         except TypeError:
2079                                 str = `ch.data`
2080                         if len(str) > 400:
2081                                 str = str[:400] + '...'
2082                         print 'warning:', ch.chtype, 'not handled, data ' + str
2083
2084
2085
2086 def main():
2087         outfile = None
2088         headerfile = 'texipre.dat'
2089         trailerfile = 'texipost.dat'
2090
2091         try:
2092                 opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
2093         except getopt.error:
2094                 args = []
2095
2096         if not args:
2097                 print 'usage: partparse [-o outfile] [-h headerfile]',
2098                 print '[-t trailerfile] file ...'
2099                 sys.exit(2)
2100
2101         for opt, arg in opts:
2102                 if opt == '-o': outfile = arg
2103                 if opt == '-h': headerfile = arg
2104                 if opt == '-t': trailerfile = arg
2105
2106         if not outfile:
2107                 root, ext = os.path.splitext(args[0])
2108                 outfile = root + '.texi'
2109
2110         if outfile in args:
2111                 print 'will not overwrite input file', outfile
2112                 sys.exit(2)
2113
2114         outf = open(outfile, 'w')
2115         outf.write(open(headerfile, 'r').read())
2116
2117         for file in args:
2118                 if len(args) > 1: print '='*20, file, '='*20
2119                 buf = open(file, 'r').read()
2120                 w, pp = parseit(buf)
2121                 startchange()
2122                 changeit(buf, pp)
2123                 dumpit(buf, outf.write, pp)
2124
2125         outf.write(open(trailerfile, 'r').read())
2126
2127         outf.close()
2128
2129 main()