Doc/partparse.py

   1 #
   2 # partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
   3 #     and generate texinfo source.
   4 #
   5 # This is *not* a good example of good programming practices. In fact, this
   6 #     file could use a complete rewrite, in order to become faster, more
   7 #     easy extensible and maintainable.
   8 #
   9 # However, I added some comments on a few places for the pityful person who
  10 #     would ever need to take a look into this file.
  11 #
  12 # Have I been clear enough??
  13 #
  14 # -jh
  15
  16
  17 import sys, string, regex, getopt, os
  18
  19 # Different parse modes for phase 1
  20 MODE_REGULAR = 0
  21 MODE_VERBATIM = 1
  22 MODE_CS_SCAN = 2
  23 MODE_COMMENT = 3
  24 MODE_MATH = 4
  25 MODE_DMATH = 5
  26 MODE_GOBBLEWHITE = 6
  27
  28 the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
  29           MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
  30
  31 # Show the neighbourhood of the scanned buffer
  32 def epsilon(buf, where):
  33         wmt, wpt = where - 10, where + 10
  34         if wmt < 0:
  35                 wmt = 0
  36         if wpt > len(buf):
  37                 wpt = len(buf)
  38         return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
  39
  40 # Should return the line number. never worked
  41 def lin():
  42         global lineno
  43         return ' Line ' + `lineno` + '.'
  44
  45 # Displays the recursion level.
  46 def lv(lvl):
  47         return ' Level ' + `lvl` + '.'
  48
  49 # Combine the three previous functions. Used often.
  50 def lle(lvl, buf, where):
  51         return lv(lvl) + lin() + epsilon(buf, where)
  52
  53
  54 # This class is only needed for _symbolic_ representation of the parse mode.
  55 class Mode:
  56         def init(self, arg):
  57                 if arg not in the_modes:
  58                         raise ValueError, 'mode not in the_modes'
  59                 self.mode = arg
  60                 return self
  61
  62         def __cmp__(self, other):
  63                 if type(self) != type(other):
  64                         other = mode(other)
  65                 return cmp(self.mode, other.mode)
  66
  67         def __repr__(self):
  68                 if self.mode == MODE_REGULAR:
  69                         return 'MODE_REGULAR'
  70                 elif self.mode == MODE_VERBATIM:
  71                         return 'MODE_VERBATIM'
  72                 elif self.mode == MODE_CS_SCAN:
  73                         return 'MODE_CS_SCAN'
  74                 elif self.mode == MODE_COMMENT:
  75                         return 'MODE_COMMENT'
  76                 elif self.mode == MODE_MATH:
  77                         return 'MODE_MATH'
  78                 elif self.mode == MODE_DMATH:
  79                         return 'MODE_DMATH'
  80                 elif self.mode == MODE_GOBBLEWHITE:
  81                         return 'MODE_GOBBLEWHITE'
  82                 else:
  83                         raise ValueError, 'mode not in the_modes'
  84
  85 # just a wrapper around a class initialisation
  86 def mode(arg):
  87         return Mode().init(arg)
  88
  89
  90 # After phase 1, the text consists of chunks, with a certain type
  91 # this type will be assigned to the chtype member of the chunk
  92 # the where-field contains the file position where this is found
  93 # and the data field contains (1): a tuple describing start- end end
  94 # positions of the substring (can be used as slice for the buf-variable),
  95 # (2) just a string, mostly generated by the changeit routine,
  96 # or (3) a list, describing a (recursive) subgroup of chunks
  97 PLAIN = 0                       # ASSUME PLAINTEXT, data = the text
  98 GROUP = 1                       # GROUP ({}), data = [chunk, chunk,..]
  99 CSNAME = 2                      # CONTROL SEQ TOKEN, data = the command
 100 COMMENT = 3                     # data is the actual comment
 101 DMATH = 4                       # DISPLAYMATH, data = [chunk, chunk,..]
 102 MATH = 5                        # MATH, see DISPLAYMATH
 103 OTHER = 6                       # CHAR WITH CATCODE OTHER, data = char
 104 ACTIVE = 7                      # ACTIVE CHAR
 105 GOBBLEDWHITE = 8                # Gobbled LWSP, after CSNAME
 106 ENDLINE = 9                     # END-OF-LINE, data = '\n'
 107 DENDLINE = 10                   # DOUBLE EOL, data='\n', indicates \par
 108 ENV = 11                        # LaTeX-environment
 109                                         # data =(envname,[ch,ch,ch,.])
 110 CSLINE = 12                     # for texi: next chunk will be one group
 111                                         # of args. Will be set all on 1 line
 112 IGNORE = 13                     # IGNORE this data
 113 ENDENV = 14                     # TEMP END OF GROUP INDICATOR
 114 IF = 15                         # IF-directive
 115                                         # data = (flag,negate,[ch, ch, ch,...])
 116 the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
 117           GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
 118
 119 # class, just to display symbolic name
 120 class ChunkType:
 121         def init(self, chunk_type):
 122                 if chunk_type not in the_types:
 123                         raise 'ValueError', 'chunk_type not in the_types'
 124                 self.chunk_type = chunk_type
 125                 return self
 126
 127         def __cmp__(self, other):
 128                 if type(self) != type(other):
 129                         other = chunk_type(other)
 130                 return cmp(self.chunk_type, other.chunk_type)
 131
 132         def __repr__(self):
 133                 if self.chunk_type == PLAIN:
 134                         return 'PLAIN'
 135                 elif self.chunk_type == GROUP:
 136                         return 'GROUP'
 137                 elif self.chunk_type == CSNAME:
 138                         return 'CSNAME'
 139                 elif self.chunk_type == COMMENT:
 140                         return 'COMMENT'
 141                 elif self.chunk_type == DMATH:
 142                         return 'DMATH'
 143                 elif self.chunk_type == MATH:
 144                         return 'MATH'
 145                 elif self.chunk_type == OTHER:
 146                         return 'OTHER'
 147                 elif self.chunk_type == ACTIVE:
 148                         return 'ACTIVE'
 149                 elif self.chunk_type == GOBBLEDWHITE:
 150                         return 'GOBBLEDWHITE'
 151                 elif self.chunk_type == DENDLINE:
 152                         return 'DENDLINE'
 153                 elif self.chunk_type == ENDLINE:
 154                         return 'ENDLINE'
 155                 elif self.chunk_type == ENV:
 156                         return 'ENV'
 157                 elif self.chunk_type == CSLINE:
 158                         return 'CSLINE'
 159                 elif self.chunk_type == IGNORE:
 160                         return 'IGNORE'
 161                 elif self.chunk_type == ENDENV:
 162                         return 'ENDENV'
 163                 elif self.chunk_type == IF:
 164                         return 'IF'
 165                 else:
 166                         raise ValueError, 'chunk_type not in the_types'
 167
 168 # ...and the wrapper
 169 def chunk_type(type):
 170         return ChunkType().init(type)
 171
 172 # store a type object of the ChunkType-class-instance...
 173 chunk_type_type = type(chunk_type(0))
 174
 175 # this class contains a part of the parsed buffer
 176 class Chunk:
 177         def init(self, chtype, where, data):
 178                 if type(chtype) != chunk_type_type:
 179                         chtype = chunk_type(chtype)
 180                 self.chtype = chtype
 181                 if type(where) != type(0):
 182                         raise TypeError, '\'where\' is not a number'
 183                 self.where = where
 184                 self.data = data
 185                 ##print 'CHUNK', self
 186                 return self
 187
 188         def __repr__(self):
 189                 return 'chunk' + `self.chtype, self.where, self.data`
 190
 191 # and the wrapper
 192 def chunk(chtype, where, data):
 193          return Chunk().init(chtype, where, data)
 194
 195
 196
 197 error = 'partparse.error'
 198
 199 #
 200 # TeX's catcodes...
 201 #
 202 CC_ESCAPE = 0
 203 CC_LBRACE = 1
 204 CC_RBRACE = 2
 205 CC_MATHSHIFT = 3
 206 CC_ALIGNMENT = 4
 207 CC_ENDLINE = 5
 208 CC_PARAMETER = 6
 209 CC_SUPERSCRIPT = 7
 210 CC_SUBSCRIPT = 8
 211 CC_IGNORE = 9
 212 CC_WHITE = 10
 213 CC_LETTER = 11
 214 CC_OTHER = 12
 215 CC_ACTIVE = 13
 216 CC_COMMENT = 14
 217 CC_INVALID = 15
 218
 219 # and the names
 220 cc_names = [\
 221           'CC_ESCAPE', \
 222           'CC_LBRACE', \
 223           'CC_RBRACE', \
 224           'CC_MATHSHIFT', \
 225           'CC_ALIGNMENT', \
 226           'CC_ENDLINE', \
 227           'CC_PARAMETER', \
 228           'CC_SUPERSCRIPT', \
 229           'CC_SUBSCRIPT', \
 230           'CC_IGNORE', \
 231           'CC_WHITE', \
 232           'CC_LETTER', \
 233           'CC_OTHER', \
 234           'CC_ACTIVE', \
 235           'CC_COMMENT', \
 236           'CC_INVALID', \
 237           ]
 238
 239 # Show a list of catcode-name-symbols
 240 def pcl(codelist):
 241         result = ''
 242         for i in codelist:
 243                 result = result + cc_names[i] + ', '
 244         return '[' + result[:-2] + ']'
 245
 246 # the name of the catcode (ACTIVE, OTHER, etc.)
 247 def pc(code):
 248         return cc_names[code]
 249
 250
 251 # Which catcodes make the parser stop parsing regular plaintext
 252 regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
 253           CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
 254           CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
 255
 256 # same for scanning a control sequence name
 257 csname_scancodes = [CC_LETTER]
 258
 259 # same for gobbling LWSP
 260 white_scancodes = [CC_WHITE]
 261 ##white_scancodes = [CC_WHITE, CC_ENDLINE]
 262
 263 # make a list of all catcode id's, except for catcode ``other''
 264 all_but_other_codes = range(16)
 265 del all_but_other_codes[CC_OTHER]
 266 ##print all_but_other_codes
 267
 268 # when does a comment end
 269 comment_stopcodes = [CC_ENDLINE]
 270
 271 # gather all characters together, specified by a list of catcodes
 272 def code2string(cc, codelist):
 273         ##print 'code2string: codelist = ' + pcl(codelist),
 274         result = ''
 275         for category in codelist:
 276                 if cc[category]:
 277                         result = result + cc[category]
 278         ##print 'result = ' + `result`
 279         return result
 280
 281 # automatically generate all characters of catcode other, being the
 282 # complement set in the ASCII range (128 characters)
 283 def make_other_codes(cc):
 284         otherchars = range(256)         # could be made 256, no problem
 285         for category in all_but_other_codes:
 286                 if cc[category]:
 287                         for c in cc[category]:
 288                                 otherchars[ord(c)] = None
 289         result = ''
 290         for i in otherchars:
 291                 if i != None:
 292                         result = result + chr(i)
 293         return result
 294
 295 # catcode dump (which characters have which catcodes).
 296 def dump_cc(name, cc):
 297         ##print '\t' + name
 298         ##print '=' * (8+len(name))
 299         if len(cc) != 16:
 300                 raise TypeError, 'cc not good cat class'
 301 ##      for i in range(16):
 302 ##              print pc(i) + '\t' + `cc[i]`
 303
 304
 305 # In the beginning,....
 306 epoch_cc = [None] * 16
 307 ##dump_cc('epoch_cc', epoch_cc)
 308
 309
 310 # INITEX
 311 initex_cc = epoch_cc[:]
 312 initex_cc[CC_ESCAPE] = '\\'
 313 initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
 314           '\n', '\0', ' '
 315 initex_cc[CC_LETTER] = string.uppercase + string.lowercase
 316 initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
 317 #initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
 318 ##dump_cc('initex_cc', initex_cc)
 319
 320
 321 # LPLAIN: LaTeX catcode setting (see lplain.tex)
 322 lplain_cc = initex_cc[:]
 323 lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
 324 lplain_cc[CC_MATHSHIFT] = '$'
 325 lplain_cc[CC_ALIGNMENT] = '&'
 326 lplain_cc[CC_PARAMETER] = '#'
 327 lplain_cc[CC_SUPERSCRIPT] = '^\x0B'     # '^' and C-k
 328 lplain_cc[CC_SUBSCRIPT] = '_\x01'       # '_' and C-a
 329 lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
 330 lplain_cc[CC_ACTIVE] = '~\x0C'          # '~' and C-l
 331 lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
 332 ##dump_cc('lplain_cc', lplain_cc)
 333
 334
 335 # Guido's LaTeX environment catcoded '_' as ``other''
 336 # my own purpose catlist
 337 my_cc = lplain_cc[:]
 338 my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
 339 my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_'       # add it to OTHER list
 340 dump_cc('my_cc', my_cc)
 341
 342
 343
 344 # needed for un_re, my equivalent for regexp-quote in Emacs
 345 re_meaning = '\\[]^$'
 346
 347 def un_re(str):
 348         result = ''
 349         for i in str:
 350                 if i in re_meaning:
 351                         result = result + '\\'
 352                 result = result + i
 353         return result
 354
 355 # NOTE the negate ('^') operator in *some* of the regexps below
 356 def make_rc_regular(cc):
 357         # problems here if '[]' are included!!
 358         return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
 359
 360 def make_rc_cs_scan(cc):
 361         return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
 362
 363 def make_rc_comment(cc):
 364         return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
 365
 366 def make_rc_endwhite(cc):
 367         return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
 368
 369
 370
 371 # regular: normal mode:
 372 rc_regular = make_rc_regular(my_cc)
 373
 374 # scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
 375 rc_cs_scan = make_rc_cs_scan(my_cc)
 376 rc_comment = make_rc_comment(my_cc)
 377 rc_endwhite = make_rc_endwhite(my_cc)
 378
 379
 380 # parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
 381 #     RECURSION-LEVEL will is incremented on entry.
 382 #     result contains the list of chunks returned
 383 #     together with this list, the buffer position is returned
 384
 385 #     RECURSION-LEVEL will be set to zero *again*, when recursively a
 386 #     {,D}MATH-mode scan has been enetered.
 387 #     This has been done in order to better check for environment-mismatches
 388
 389 def parseit(buf, *rest):
 390         global lineno
 391
 392         if len(rest) == 3:
 393                 parsemode, start, lvl = rest
 394         elif len(rest) == 2:
 395                 parsemode, start, lvl = rest + (0, )
 396         elif len(rest) == 1:
 397                 parsemode, start, lvl = rest + (0, 0)
 398         elif len(rest) == 0:
 399                 parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
 400         else:
 401                 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
 402         result = []
 403         end = len(buf)
 404         if lvl == 0 and parsemode == mode(MODE_REGULAR):
 405                 lineno = 1
 406         lvl = lvl + 1
 407
 408         ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
 409
 410         #
 411         # some of the more regular modes...
 412         #
 413
 414         if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
 415                 cstate = []
 416                 newpos = start
 417                 curpmode = parsemode
 418                 while 1:
 419                         where = newpos
 420                         #print '\tnew round: ' + epsilon(buf, where)
 421                         if where == end:
 422                                 if lvl > 1 or curpmode != mode(MODE_REGULAR):
 423                                         # not the way we started...
 424                                         raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
 425                                 # the real ending of lvl-1 parse
 426                                 return end, result
 427
 428                         pos = rc_regular.search(buf, where)
 429
 430                         if pos < 0:
 431                                 pos = end
 432
 433                         if pos != where:
 434                                 newpos, c = pos, chunk(PLAIN, where, (where, pos))
 435                                 result.append(c)
 436                                 continue
 437
 438
 439                         #
 440                         # ok, pos == where and pos != end
 441                         #
 442                         foundchar = buf[where]
 443                         if foundchar in my_cc[CC_LBRACE]:
 444                                 # recursive subgroup parse...
 445                                 newpos, data = parseit(buf, curpmode, where+1, lvl)
 446                                 result.append(chunk(GROUP, where, data))
 447
 448                         elif foundchar in my_cc[CC_RBRACE]:
 449                                 if lvl <= 1:
 450                                         raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
 451                                 if  lvl == 1 and mode != mode(MODE_REGULAR):
 452                                         raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
 453                                 return where + 1, result
 454
 455                         elif foundchar in my_cc[CC_ESCAPE]:
 456                                 #
 457                                 # call the routine that actually deals with
 458                                 #     this problem. If do_ret is None, than
 459                                 #     return the value of do_ret
 460                                 #
 461                                 # Note that handle_cs might call this routine
 462                                 #     recursively again...
 463                                 #
 464                                 do_ret, newpos = handlecs(buf, where, \
 465                                           curpmode, lvl, result, end)
 466                                 if do_ret != None:
 467                                         return do_ret
 468
 469                         elif foundchar in my_cc[CC_COMMENT]:
 470                                 newpos, data = parseit(buf, \
 471                                           mode(MODE_COMMENT), where+1, lvl)
 472                                 result.append(chunk(COMMENT, where, data))
 473
 474                         elif foundchar in my_cc[CC_MATHSHIFT]:
 475                                 # note that recursive calls to math-mode
 476                                 # scanning are called with recursion-level 0
 477                                 # again, in order to check for bad mathend
 478                                 #
 479                                 if where + 1 != end and \
 480                                           buf[where + 1] in \
 481                                           my_cc[CC_MATHSHIFT]:
 482                                         #
 483                                         # double mathshift, e.g. '$$'
 484                                         #
 485                                         if curpmode == mode(MODE_REGULAR):
 486                                                 newpos, data = parseit(buf, \
 487                                                           mode(MODE_DMATH), \
 488                                                           where+2, 0)
 489                                                 result.append(chunk(DMATH, \
 490                                                           where, data))
 491                                         elif curpmode == mode(MODE_MATH):
 492                                                 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
 493                                         elif lvl != 1:
 494                                                 raise error, 'bad mathend.' + \
 495                                                           lle(lvl, buf, where)
 496                                         else:
 497                                                 return where + 2, result
 498                                 else:
 499                                         #
 500                                         # single math shift, e.g. '$'
 501                                         #
 502                                         if curpmode == mode(MODE_REGULAR):
 503                                                 newpos, data = parseit(buf, \
 504                                                           mode(MODE_MATH), \
 505                                                           where+1, 0)
 506                                                 result.append(chunk(MATH, \
 507                                                           where, data))
 508                                         elif curpmode == mode(MODE_DMATH):
 509                                                 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
 510                                         elif lvl != 1:
 511                                                 raise error, 'bad mathend.' + \
 512                                                           lv(lvl, buf, where)
 513                                         else:
 514                                                 return where + 1, result
 515
 516                         elif foundchar in my_cc[CC_IGNORE]:
 517                                 print 'warning: ignored char', `foundchar`
 518                                 newpos = where + 1
 519
 520                         elif foundchar in my_cc[CC_ACTIVE]:
 521                                 result.append(chunk(ACTIVE, where, foundchar))
 522                                 newpos = where + 1
 523
 524                         elif foundchar in my_cc[CC_INVALID]:
 525                                 raise error, 'invalid char ' + `foundchar`
 526                                 newpos = where + 1
 527
 528                         elif foundchar in my_cc[CC_ENDLINE]:
 529                                 #
 530                                 # after an end of line, eat the rest of
 531                                 # whitespace on the beginning of the next line
 532                                 # this is what LaTeX more or less does
 533                                 #
 534                                 # also, try to indicate double newlines (\par)
 535                                 #
 536                                 lineno = lineno + 1
 537                                 savedwhere = where
 538                                 newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
 539                                 if newpos != end and buf[newpos] in \
 540                                           my_cc[CC_ENDLINE]:
 541                                         result.append(chunk(DENDLINE, \
 542                                                   savedwhere, foundchar))
 543                                 else:
 544                                         result.append(chunk(ENDLINE, \
 545                                                   savedwhere, foundchar))
 546                         else:
 547                                 result.append(chunk(OTHER, where, foundchar))
 548                                 newpos = where + 1
 549
 550         elif parsemode == mode(MODE_CS_SCAN):
 551                 #
 552                 # scan for a control sequence token. `\ape', `\nut' or `\%'
 553                 #
 554                 if start == end:
 555                         raise EOFError, 'can\'t find end of csname'
 556                 pos = rc_cs_scan.search(buf, start)
 557                 if pos < 0:
 558                         pos = end
 559                 if pos == start:
 560                         # first non-letter right where we started the search
 561                         # ---> the control sequence name consists of one single
 562                         # character. Also: don't eat white space...
 563                         if buf[pos] in my_cc[CC_ENDLINE]:
 564                                 lineno = lineno + 1
 565                         pos = pos + 1
 566                         return pos, (start, pos)
 567                 else:
 568                         spos = pos
 569                         if buf[pos] == '\n':
 570                                 lineno = lineno + 1
 571                                 spos = pos + 1
 572                         pos2, dummy = parseit(buf, \
 573                                   mode(MODE_GOBBLEWHITE), spos, lvl)
 574                         return pos2, (start, pos)
 575
 576         elif parsemode == mode(MODE_GOBBLEWHITE):
 577                 if start == end:
 578                         return start, ''
 579                 pos = rc_endwhite.search(buf, start)
 580                 if pos < 0:
 581                         pos = start
 582                 return pos, (start, pos)
 583
 584         elif parsemode == mode(MODE_COMMENT):
 585                 pos = rc_comment.search(buf, start)
 586                 lineno = lineno + 1
 587                 if pos < 0:
 588                         print 'no newline perhaps?'
 589                         raise EOFError, 'can\'t find end of comment'
 590                 pos = pos + 1
 591                 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
 592                 return pos2, (start, pos)
 593
 594
 595         else:
 596                 raise error, 'Unknown mode (' + `parsemode` + ')'
 597
 598
 599 #moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
 600
 601 #boxcommands = 'mbox', 'fbox'
 602 #defcommands = 'def', 'newcommand'
 603
 604 endverbstr = '\\end{verbatim}'
 605
 606 re_endverb = regex.compile(un_re(endverbstr))
 607
 608 #
 609 # handlecs: helper function for parseit, for the special thing we might
 610 #     wanna do after certain command control sequences
 611 # returns: None or return_data, newpos
 612 #
 613 # in the latter case, the calling function is instructed to immediately
 614 # return with the data in return_data
 615 #
 616 def handlecs(buf, where, curpmode, lvl, result, end):
 617         global lineno
 618
 619         # get the control sequence name...
 620         newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
 621         saveddata = data
 622
 623         if s(buf, data) in ('begin', 'end'):
 624                 # skip the expected '{' and get the LaTeX-envname '}'
 625                 newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
 626                 if len(data) != 1:
 627                         raise error, 'expected 1 chunk of data.' + \
 628                                   lle(lvl, buf, where)
 629
 630                 # yucky, we've got an environment
 631                 envname = s(buf, data[0].data)
 632                 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
 633                 if s(buf, saveddata) == 'begin' and envname == 'verbatim':
 634                         # verbatim deserves special treatment
 635                         pos = re_endverb.search(buf, newpos)
 636                         if pos < 0:
 637                                 raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
 638                         result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
 639                         newpos = pos + len(endverbstr)
 640
 641                 elif s(buf, saveddata) == 'begin':
 642                         # start parsing recursively... If that parse returns
 643                         # from an '\end{...}', then should the last item of
 644                         # the returned data be a string containing the ended
 645                         # environment
 646                         newpos, data = parseit(buf, curpmode, newpos, lvl)
 647                         if not data or type(data[-1]) != type(''):
 648                                 raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
 649                         retenv = data[-1]
 650                         del data[-1]
 651                         if retenv != envname:
 652                                 #[`retenv`, `envname`]
 653                                 raise error, 'environments do not match.' + \
 654                                           lle(lvl, buf, where) + \
 655                                           epsilon(buf, newpos)
 656                         result.append(chunk(ENV, where, (retenv, data)))
 657                 else:
 658                         # 'end'... append the environment name, as just
 659                         # pointed out, and order parsit to return...
 660                         result.append(envname)
 661                         ##print 'POINT of return: ' + epsilon(buf, newpos)
 662                         # the tuple will be returned by parseit
 663                         return (newpos, result), newpos
 664
 665         # end of \begin ... \end handling
 666
 667         elif s(buf, data)[0:2] == 'if':
 668                 # another scary monster: the 'if' directive
 669                 flag = s(buf, data)[2:]
 670
 671                 # recursively call parseit, just like environment above..
 672                 # the last item of data should contain the if-termination
 673                 # e.g., 'else' of 'fi'
 674                 newpos, data = parseit(buf, curpmode, newpos, lvl)
 675                 if not data or data[-1] not in ('else', 'fi'):
 676                         raise error, 'wrong if... termination' + \
 677                                   lle(lvl, buf, where) + epsilon(buf, newpos)
 678
 679                 ifterm = data[-1]
 680                 del data[-1]
 681                 # 0 means dont_negate flag
 682                 result.append(chunk(IF, where, (flag, 0, data)))
 683                 if ifterm == 'else':
 684                         # do the whole thing again, there is only one way
 685                         # to end this one, by 'fi'
 686                         newpos, data = parseit(buf, curpmode, newpos, lvl)
 687                         if not data or data[-1] not in ('fi', ):
 688                                 raise error, 'wrong if...else... termination' \
 689                                           + lle(lvl, buf, where) \
 690                                           + epsilon(buf, newpos)
 691
 692                         ifterm = data[-1]
 693                         del data[-1]
 694                         result.append(chunk(IF, where, (flag, 1, data)))
 695                 #done implicitely: return None, newpos
 696
 697         elif s(buf, data) in ('else', 'fi'):
 698                 result.append(s(buf, data))
 699                 # order calling party to return tuple
 700                 return (newpos, result), newpos
 701
 702         # end of \if, \else, ... \fi handling
 703
 704         elif s(buf, saveddata) == 'verb':
 705                 x2 = saveddata[1]
 706                 result.append(chunk(CSNAME, where, data))
 707                 if x2 == end:
 708                         raise error, 'premature end of command.' + lle(lvl, buf, where)
 709                 delimchar = buf[x2]
 710                 ##print 'VERB: delimchar ' + `delimchar`
 711                 pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
 712                 if pos < 0:
 713                         raise error, 'end of \'verb\' argument (' + \
 714                                   `delimchar` + ') not found.' + \
 715                                   lle(lvl, buf, where)
 716                 result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
 717                 newpos = pos + 1
 718         else:
 719                 result.append(chunk(CSNAME, where, data))
 720         return None, newpos
 721
 722 # this is just a function to get the string value if the possible data-tuple
 723 def s(buf, data):
 724         if type(data) == type(''):
 725                 return data
 726         if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
 727                 raise TypeError, 'expected tuple of 2 integers'
 728         x1, x2 = data
 729         return buf[x1:x2]
 730
 731
 732 ##length, data1, i = getnextarg(length, buf, pp, i + 1)
 733
 734 # make a deep-copy of some chunks
 735 def crcopy(r):
 736         result = []
 737         for x in r:
 738                 result.append(chunkcopy(x))
 739         return result
 740
 741
 742
 743 # copy a chunk, would better be a method of class Chunk...
 744 def chunkcopy(ch):
 745         if ch.chtype == chunk_type(GROUP):
 746                 listc = ch.data[:]
 747                 for i in range(len(listc)):
 748                         listc[i] = chunkcopy(listc[i])
 749                 return chunk(GROUP, ch.where, listc)
 750         else:
 751                 return chunk(ch.chtype, ch.where, ch.data)
 752
 753
 754 # get next argument for TeX-macro, flatten a group (insert between)
 755 # or return Command Sequence token, or give back one character
 756 def getnextarg(length, buf, pp, item):
 757
 758         ##wobj = Wobj().init()
 759         ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
 760         ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
 761
 762         while item < length and pp[item].chtype == chunk_type(ENDLINE):
 763                 del pp[item]
 764                 length = length - 1
 765         if item >= length:
 766                 raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
 767         if pp[item].chtype == chunk_type(GROUP):
 768                 newpp = pp[item].data
 769                 del pp[item]
 770                 length = length - 1
 771                 changeit(buf, newpp)
 772                 length = length + len(newpp)
 773                 pp[item:item] = newpp
 774                 item = item + len(newpp)
 775                 if len(newpp) < 10:
 776                         wobj = Wobj().init()
 777                         dumpit(buf, wobj.write, newpp)
 778                         ##print 'GETNEXTARG: inserted ' + `wobj.data`
 779                 return length, item
 780         elif pp[item].chtype == chunk_type(PLAIN):
 781                 #grab one char
 782                 print 'WARNING: grabbing one char'
 783                 if len(s(buf, pp[item].data)) > 1:
 784                         pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
 785                         item, length = item+1, length+1
 786                         pp[item].data = s(buf, pp[item].data)[1:]
 787                 else:
 788                         item = item+1
 789                 return length, item
 790         else:
 791                 ch = pp[item]
 792                 try:
 793                         str = `s(buf, ch.data)`
 794                 except TypeError:
 795                         str = `ch.data`
 796                         if len(str) > 400:
 797                                 str = str[:400] + '...'
 798                 print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
 799                 return length, item
 800
 801
 802 # this one is needed to find the end of LaTeX's optional argument, like
 803 # item[...]
 804 re_endopt = regex.compile(']')
 805
 806 # get a LaTeX-optional argument, you know, the square braces '[' and ']'
 807 def getoptarg(length, buf, pp, item):
 808
 809         wobj = Wobj().init()
 810         dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
 811         ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
 812
 813         if item >= length or \
 814                   pp[item].chtype != chunk_type(PLAIN) or \
 815                   s(buf, pp[item].data)[0] != '[':
 816                 return length, item
 817
 818         pp[item].data = s(buf, pp[item].data)[1:]
 819         if len(pp[item].data) == 0:
 820                 del pp[item]
 821                 length = length-1
 822
 823         while 1:
 824                 if item == length:
 825                         raise error, 'No end of optional arg found'
 826                 if pp[item].chtype == chunk_type(PLAIN):
 827                         text = s(buf, pp[item].data)
 828                         pos = re_endopt.search(text)
 829                         if pos >= 0:
 830                                 pp[item].data = text[:pos]
 831                                 if pos == 0:
 832                                         del pp[item]
 833                                         length = length-1
 834                                 else:
 835                                         item=item+1
 836                                 text = text[pos+1:]
 837
 838                                 while text and text[0] in ' \t':
 839                                         text = text[1:]
 840
 841                                 if text:
 842                                         pp.insert(item, chunk(PLAIN, 0, text))
 843                                         length = length + 1
 844                                 return length, item
 845
 846                 item = item+1
 847
 848
 849 # Wobj just add write-requests to the ``data'' attribute
 850 class Wobj:
 851         def init(self):
 852                 self.data = ''
 853                 return self
 854         def write(self, data):
 855                 self.data = self.data + data
 856
 857 # ignore these commands
 858 ignoredcommands = ('bcode', 'ecode')
 859 # map commands like these to themselves as plaintext
 860 wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX')
 861 # \{ --> {,  \} --> }, etc
 862 themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves
 863 # these ones also themselves (see argargs macro in myformat.sty)
 864 inargsselves = (',', '[', ']', '(', ')')
 865 # this is how *I* would show the difference between emph and strong
 866 #  code 1 means: fold to uppercase
 867 markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
 868           'strong': ('*', '*')}
 869
 870 # recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
 871 fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
 872
 873 # transparent for these commands
 874 for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
 875             'file', 'r', 'i', 't')
 876
 877
 878 # try to remove macros and return flat text
 879 def flattext(buf, pp):
 880         pp = crcopy(pp)
 881         ##print '---> FLATTEXT ' + `pp`
 882         wobj = Wobj().init()
 883
 884         i, length = 0, len(pp)
 885         while 1:
 886                 if len(pp) != length:
 887                         raise 'FATAL', 'inconsistent length'
 888                 if i >= length:
 889                         break
 890                 ch = pp[i]
 891                 i = i+1
 892                 if ch.chtype == chunk_type(PLAIN):
 893                         pass
 894                 elif ch.chtype == chunk_type(CSNAME):
 895                         if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
 896                                 ch.chtype = chunk_type(PLAIN)
 897                         elif s(buf, ch.data) == 'e':
 898                                 ch.chtype = chunk_type(PLAIN)
 899                                 ch.data = '\\'
 900                         elif len(s(buf, ch.data)) == 1 \
 901                                   and s(buf, ch.data) in onlylatexspecial:
 902                                 ch.chtype = chunk_type(PLAIN)
 903                                 # if it is followed by an empty group,
 904                                 # remove that group, it was needed for
 905                                 # a true space
 906                                 if i < length \
 907                                           and pp[i].chtype==chunk_type(GROUP) \
 908                                           and len(pp[i].data) == 0:
 909                                         del pp[i]
 910                                         length = length-1
 911
 912                         elif s(buf, ch.data) in markcmds.keys():
 913                                 length, newi = getnextarg(length, buf, pp, i)
 914                                 str = flattext(buf, pp[i:newi])
 915                                 del pp[i:newi]
 916                                 length = length - (newi - i)
 917                                 ch.chtype = chunk_type(PLAIN)
 918                                 markcmd = s(buf, ch.data)
 919                                 x = markcmds[markcmd]
 920                                 if type(x) == type(()):
 921                                         pre, after = x
 922                                         str = pre+str+after
 923                                 elif x == 1:
 924                                         str = string.upper(str)
 925                                 else:
 926                                         raise 'FATAL', 'corrupt markcmds'
 927                                 ch.data = str
 928                         else:
 929                                 if s(buf, ch.data) not in ignoredcommands:
 930                                         print 'WARNING: deleting command ' + `s(buf, ch.data)`
 931                                         print 'PP' + `pp[i-1]`
 932                                 del pp[i-1]
 933                                 i, length = i-1, length-1
 934                 elif ch.chtype == chunk_type(GROUP):
 935                         length, newi = getnextarg(length, buf, pp, i-1)
 936                         i = i-1
 937 ##                      str = flattext(buf, crcopy(pp[i-1:newi]))
 938 ##                      del pp[i:newi]
 939 ##                      length = length - (newi - i)
 940 ##                      ch.chtype = chunk_type(PLAIN)
 941 ##                      ch.data = str
 942                 else:
 943                         pass
 944
 945         dumpit(buf, wobj.write, pp)
 946         ##print 'FLATTEXT: RETURNING ' + `wobj.data`
 947         return wobj.data
 948
 949 # try to generate node names (a bit shorter than the chapter title)
 950 # note that the \nodename command (see elsewhere) overules these efforts
 951 def invent_node_names(text):
 952         words = string.split(text)
 953
 954         ##print 'WORDS ' + `words`
 955
 956         if len(words) == 2 \
 957                   and string.lower(words[0]) == 'built-in' \
 958                   and string.lower(words[1]) not in ('modules', 'functions'):
 959                 return words[1]
 960         if len(words) == 3 and string.lower(words[1]) == 'module':
 961                 return words[2]
 962         if len(words) == 3 and string.lower(words[1]) == 'object':
 963                 return string.join(words[0:2])
 964         if len(words) > 4 and string.lower(string.join(words[-4:])) == \
 965                   'methods and data attributes':
 966                 return string.join(words[:2])
 967         return text
 968
 969 re_commas_etc = regex.compile('[,`\'@{}]')
 970
 971 re_whitespace = regex.compile('[ \t]*')
 972
 973
 974 ##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
 975
 976 # look if the next non-white stuff is also a command, resulting in skipping
 977 # double endlines (DENDLINE) too, and thus omitting \par's
 978 # Sometimes this is too much, maybe consider DENDLINE's as stop
 979 def next_command_p(length, buf, pp, i, cmdname):
 980
 981         while 1:
 982                 if i >= len(pp):
 983                         break
 984                 ch = pp[i]
 985                 i = i+1
 986                 if ch.chtype == chunk_type(ENDLINE):
 987                         continue
 988                 if ch.chtype == chunk_type(DENDLINE):
 989                         continue
 990                 if ch.chtype == chunk_type(PLAIN):
 991                         if re_whitespace.search(s(buf, ch.data)) == 0 and \
 992                                   re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
 993                                 continue
 994                         return -1
 995                 if ch.chtype == chunk_type(CSNAME):
 996                         if s(buf, ch.data) == cmdname:
 997                                 return i # _after_ the command
 998                         return -1
 999                 return -1
1000
1001
1002 # things that are special to LaTeX, but not to texi..
1003 onlylatexspecial = '_~^$#&%'
1004
1005 class Struct: pass
1006
1007 hist = Struct()
1008 out = Struct()
1009
1010 def startchange():
1011         global hist, out
1012
1013         hist.inenv = []
1014         hist.nodenames = []
1015         hist.cindex = []
1016         hist.inargs = 0
1017         hist.enumeratenesting, hist.itemizenesting = 0, 0
1018
1019         out.doublenodes = []
1020         out.doublecindeces = []
1021
1022
1023 spacech = [chunk(PLAIN, 0, ' ')]
1024 commach = [chunk(PLAIN, 0, ', ')]
1025 cindexch = [chunk(CSLINE, 0, 'cindex')]
1026
1027 # the standard variation in symbols for itemize
1028 itemizesymbols = ['bullet', 'minus', 'dots']
1029
1030 # same for enumerate
1031 enumeratesymbols = ['1', 'A', 'a']
1032
1033 ##
1034 ## \begin{ {func,data,exc}desc }{name}...
1035 ##   the resulting texi-code is dependent on the contents of indexsubitem
1036 ##
1037
1038 # indexsubitem: `['XXX', 'function']
1039 # funcdesc:
1040 #     deffn {`idxsi`} NAME (FUNCARGS)
1041
1042 # indexsubitem: `['XXX', 'method']`
1043 # funcdesc:
1044 #     defmethod {`idxsi[0]`} NAME (FUNCARGS)
1045
1046 # indexsubitem: `['in', 'module', 'MODNAME']'
1047 # datadesc:
1048 #     defcv data {`idxsi[1:]`} NAME
1049 # excdesc:
1050 #     defcv exception {`idxsi[1:]`} NAME
1051 # funcdesc:
1052 #     deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1053
1054 # indexsubitem: `['OBJECT', 'attribute']'
1055 # datadesc
1056 #     defcv attribute {`OBJECT`} NAME
1057
1058
1059 ## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1060 ##   or \funcline{NAME}{ARGS}
1061 ##
1062 def do_funcdesc(length, buf, pp, i):
1063         startpoint = i-1
1064         ch = pp[startpoint]
1065         wh = ch.where
1066         length, newi = getnextarg(length, buf, pp, i)
1067         funcname = chunk(GROUP, wh, pp[i:newi])
1068         del pp[i:newi]
1069         length = length - (newi-i)
1070         save = hist.inargs
1071         hist.inargs = 1
1072         length, newi = getnextarg(length, buf, pp, i)
1073         hist.inargs = save
1074         del save
1075         the_args = [chunk(PLAIN, wh, '()'[0])] + \
1076                   pp[i:newi] + \
1077                   [chunk(PLAIN, wh, '()'[1])]
1078         del pp[i:newi]
1079         length = length - (newi-i)
1080
1081         idxsi = hist.indexsubitem       # words
1082         command = ''
1083         cat_class = ''
1084         if idxsi and idxsi[-1] in ('method', 'protocol'):
1085                 command = 'defmethod'
1086                 cat_class = string.join(idxsi[:-1])
1087         elif len(idxsi) == 2 and idxsi[1] == 'function':
1088                 command = 'deffn'
1089                 cat_class = string.join(idxsi)
1090         elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1091                 command = 'deffn'
1092                 cat_class = 'function of ' + string.join(idxsi[1:])
1093
1094         if not command:
1095                 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1096
1097         ch.chtype = chunk_type(CSLINE)
1098         ch.data = command
1099
1100         cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1101         cslinearg.append(chunk(PLAIN, wh, ' '))
1102         cslinearg.append(funcname)
1103         cslinearg.append(chunk(PLAIN, wh, ' '))
1104         l = len(cslinearg)
1105         cslinearg[l:l] = the_args
1106
1107         pp.insert(i, chunk(GROUP, wh, cslinearg))
1108         i, length = i+1, length+1
1109         hist.command = command
1110         return length, i
1111
1112
1113 ## this routine will be called on \begin{excdesc}{NAME}
1114 ## or \excline{NAME}
1115 ##
1116 def do_excdesc(length, buf, pp, i):
1117         startpoint = i-1
1118         ch = pp[startpoint]
1119         wh = ch.where
1120         length, newi = getnextarg(length, buf, pp, i)
1121         excname = chunk(GROUP, wh, pp[i:newi])
1122         del pp[i:newi]
1123         length = length - (newi-i)
1124
1125         idxsi = hist.indexsubitem       # words
1126         command = ''
1127         cat_class = ''
1128         class_class = ''
1129         if len(idxsi) == 2 and idxsi[1] == 'exception':
1130                 command = 'defvr'
1131                 cat_class = string.join(idxsi)
1132         elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1133                 command = 'defcv'
1134                 cat_class = 'exception'
1135                 class_class = string.join(idxsi[1:])
1136         elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
1137                 command = 'defcv'
1138                 cat_class = 'exception'
1139                 class_class = string.join(idxsi[2:])
1140
1141
1142         if not command:
1143                 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1144
1145         ch.chtype = chunk_type(CSLINE)
1146         ch.data = command
1147
1148         cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1149         cslinearg.append(chunk(PLAIN, wh, ' '))
1150         if class_class:
1151                 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1152                 cslinearg.append(chunk(PLAIN, wh, ' '))
1153         cslinearg.append(excname)
1154
1155         pp.insert(i, chunk(GROUP, wh, cslinearg))
1156         i, length = i+1, length+1
1157         hist.command = command
1158         return length, i
1159
1160 ## same for datadesc or dataline...
1161 def do_datadesc(length, buf, pp, i):
1162         startpoint = i-1
1163         ch = pp[startpoint]
1164         wh = ch.where
1165         length, newi = getnextarg(length, buf, pp, i)
1166         dataname = chunk(GROUP, wh, pp[i:newi])
1167         del pp[i:newi]
1168         length = length - (newi-i)
1169
1170         idxsi = hist.indexsubitem       # words
1171         command = ''
1172         cat_class = ''
1173         class_class = ''
1174         if idxsi[-1] in ('attribute', 'option'):
1175                 command = 'defcv'
1176                 cat_class = idxsi[-1]
1177                 class_class = string.join(idxsi[:-1])
1178         elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1179                 command = 'defcv'
1180                 cat_class = 'data'
1181                 class_class = string.join(idxsi[1:])
1182         elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
1183                 command = 'defcv'
1184                 cat_class = 'data'
1185                 class_class = string.join(idxsi[2:])
1186
1187
1188         if not command:
1189                 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1190
1191         ch.chtype = chunk_type(CSLINE)
1192         ch.data = command
1193
1194         cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1195         cslinearg.append(chunk(PLAIN, wh, ' '))
1196         if class_class:
1197                 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1198                 cslinearg.append(chunk(PLAIN, wh, ' '))
1199         cslinearg.append(dataname)
1200
1201         pp.insert(i, chunk(GROUP, wh, cslinearg))
1202         i, length = i+1, length+1
1203         hist.command = command
1204         return length, i
1205
1206
1207 # regular indices: those that are not set in tt font by default....
1208 regindices = ('cindex', )
1209
1210 # remove illegal characters from node names
1211 def rm_commas_etc(text):
1212         result = ''
1213         changed = 0
1214         while 1:
1215                 pos = re_commas_etc.search(text)
1216                 if pos >= 0:
1217                         changed = 1
1218                         result = result + text[:pos]
1219                         text = text[pos+1:]
1220                 else:
1221                         result = result + text
1222                         break
1223         if changed:
1224                 print 'Warning: nodename changhed to ' + `result`
1225
1226         return result
1227
1228 # boolean flags
1229 flags = {'texi': 1}
1230
1231
1232 ##
1233 ## changeit: the actual routine, that changes the contents of the parsed
1234 ##           chunks
1235 ##
1236
1237 def changeit(buf, pp):
1238         global onlylatexspecial, hist, out
1239
1240         i, length = 0, len(pp)
1241         while 1:
1242                 # sanity check: length should always equal len(pp)
1243                 if len(pp) != length:
1244                         raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
1245                 if i >= length:
1246                         break
1247                 ch = pp[i]
1248                 i = i + 1
1249
1250                 if type(ch) == type(''):
1251                         #normally, only chunks are present in pp,
1252                         # but in some cases, some extra info
1253                         # has been inserted, e.g., the \end{...} clauses
1254                         raise 'FATAL', 'got string, probably too many ' + `end`
1255
1256                 if ch.chtype == chunk_type(GROUP):
1257                         # check for {\em ...} constructs
1258                         if ch.data and \
1259                                   ch.data[0].chtype == chunk_type(CSNAME) and \
1260                                   s(buf, ch.data[0].data) in fontchanges.keys():
1261                                 k = s(buf, ch.data[0].data)
1262                                 del ch.data[0]
1263                                 pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
1264                                 length, i = length+1, i+1
1265
1266                         # recursively parse the contents of the group
1267                         changeit(buf, ch.data)
1268
1269                 elif ch.chtype == chunk_type(IF):
1270                         # \if...
1271                         flag, negate, data = ch.data
1272                         ##print 'IF: flag, negate = ' + `flag, negate`
1273                         if flag not in flags.keys():
1274                                 raise error, 'unknown flag ' + `flag`
1275
1276                         value = flags[flag]
1277                         if negate:
1278                                 value = (not value)
1279                         del pp[i-1]
1280                         length, i = length-1, i-1
1281                         if value:
1282                                 pp[i:i] = data
1283                                 length = length + len(data)
1284
1285
1286                 elif ch.chtype == chunk_type(ENV):
1287                         # \begin{...} ....
1288                         envname, data = ch.data
1289
1290                         #push this environment name on stack
1291                         hist.inenv.insert(0, envname)
1292
1293                         #append an endenv chunk after grouped data
1294                         data.append(chunk(ENDENV, ch.where, envname))
1295                         ##[`data`]
1296
1297                         #delete this object
1298                         del pp[i-1]
1299                         i, length = i-1, length-1
1300
1301                         #insert found data
1302                         pp[i:i] = data
1303                         length = length + len(data)
1304
1305                         if envname == 'verbatim':
1306                                 pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
1307                                           chunk(GROUP, ch.where, [])]
1308                                 length, i = length+2, i+2
1309
1310                         elif envname == 'itemize':
1311                                 if hist.itemizenesting > len(itemizesymbols):
1312                                         raise error, 'too deep itemize nesting'
1313                                 ingroupch = [chunk(CSNAME, ch.where,\
1314                                           itemizesymbols[hist.itemizenesting])]
1315                                 hist.itemizenesting = hist.itemizenesting + 1
1316                                 pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
1317                                           chunk(GROUP, ch.where, ingroupch)]
1318                                 length, i = length+2, i+2
1319
1320                         elif envname == 'enumerate':
1321                                 if hist.enumeratenesting > len(enumeratesymbols):
1322                                         raise error, 'too deep enumerate nesting'
1323                                 ingroupch = [chunk(PLAIN, ch.where,\
1324                                           enumeratesymbols[hist.enumeratenesting])]
1325                                 hist.enumeratenesting = hist.enumeratenesting + 1
1326                                 pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
1327                                           chunk(GROUP, ch.where, ingroupch)]
1328                                 length, i = length+2, i+2
1329
1330                         elif envname == 'description':
1331                                 ingroupch = [chunk(CSNAME, ch.where, 'b')]
1332                                 pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
1333                                           chunk(GROUP, ch.where, ingroupch)]
1334                                 length, i = length+2, i+2
1335
1336                         elif (envname == 'tableiii') or \
1337                              (envname == 'tableii'):
1338                                 if (envname == 'tableii'):
1339                                         ltable = 2
1340                                 else:
1341                                         ltable = 3
1342                                 wh = ch.where
1343                                 newcode = []
1344
1345                                 #delete tabular format description
1346                                 # e.g., {|l|c|l|}
1347                                 length, newi = getnextarg(length, buf, pp, i)
1348                                 del pp[i:newi]
1349                                 length = length - (newi-i)
1350
1351                                 newcode.append(chunk(CSLINE, wh, 'table'))
1352                                 ingroupch = [chunk(CSNAME, wh, 'asis')]
1353                                 newcode.append(chunk(GROUP, wh, ingroupch))
1354                                 newcode.append(chunk(CSLINE, wh, 'item'))
1355
1356                                 #get the name of macro for @item
1357                                 # e.g., {code}
1358                                 length, newi = getnextarg(length, buf, pp, i)
1359
1360                                 if newi-i != 1:
1361                                         raise error, 'Sorry, expected 1 chunk argument'
1362                                 if pp[i].chtype != chunk_type(PLAIN):
1363                                         raise error, 'Sorry, expected plain text argument'
1364                                 hist.itemargmacro = s(buf, pp[i].data)
1365                                 del pp[i:newi]
1366                                 length = length - (newi-i)
1367
1368                                 itembody = []
1369                                 for count in range(ltable):
1370                                         length, newi = getnextarg(length, buf, pp, i)
1371                                         emphgroup = [\
1372                                                   chunk(CSNAME, wh, 'emph'), \
1373                                                   chunk(GROUP, 0, pp[i:newi])]
1374                                         del pp[i:newi]
1375                                         length = length - (newi-i)
1376                                         if count == 0:
1377                                                 itemarg = emphgroup
1378                                         elif count == ltable-1:
1379                                                 itembody = itembody + \
1380                                                           [chunk(PLAIN, wh, '  ---  ')] + \
1381                                                           emphgroup
1382                                         else:
1383                                                 itembody = emphgroup
1384                                 newcode.append(chunk(GROUP, wh, itemarg))
1385                                 newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
1386                                 pp[i:i] = newcode
1387                                 l = len(newcode)
1388                                 length, i = length+l, i+l
1389                                 del newcode, l
1390
1391                                 if length != len(pp):
1392                                         raise 'STILL, SOMETHING wrong', `i`
1393
1394
1395                         elif envname == 'funcdesc':
1396                                 pp.insert(i, chunk(PLAIN, ch.where, ''))
1397                                 i, length = i+1, length+1
1398                                 length, i = do_funcdesc(length, buf, pp, i)
1399
1400                         elif envname == 'excdesc':
1401                                 pp.insert(i, chunk(PLAIN, ch.where, ''))
1402                                 i, length = i+1, length+1
1403                                 length, i = do_excdesc(length, buf, pp, i)
1404
1405                         elif envname == 'datadesc':
1406                                 pp.insert(i, chunk(PLAIN, ch.where, ''))
1407                                 i, length = i+1, length+1
1408                                 length, i = do_datadesc(length, buf, pp, i)
1409
1410                         else:
1411                                 print 'WARNING: don\'t know what to do with env ' + `envname`
1412
1413                 elif ch.chtype == chunk_type(ENDENV):
1414                         envname = ch.data
1415                         if envname != hist.inenv[0]:
1416                                 raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
1417                         del hist.inenv[0]
1418                         del pp[i-1]
1419                         i, length = i-1, length-1
1420
1421                         if envname == 'verbatim':
1422                                 pp[i:i] = [\
1423                                           chunk(CSLINE, ch.where, 'end'), \
1424                                           chunk(GROUP, ch.where, [\
1425                                           chunk(PLAIN, ch.where, 'example')])]
1426                                 i, length = i+2, length+2
1427                         elif envname == 'itemize':
1428                                 hist.itemizenesting = hist.itemizenesting - 1
1429                                 pp[i:i] = [\
1430                                           chunk(CSLINE, ch.where, 'end'), \
1431                                           chunk(GROUP, ch.where, [\
1432                                           chunk(PLAIN, ch.where, 'itemize')])]
1433                                 i, length = i+2, length+2
1434                         elif envname == 'enumerate':
1435                                 hist.enumeratenesting = hist.enumeratenesting-1
1436                                 pp[i:i] = [\
1437                                           chunk(CSLINE, ch.where, 'end'), \
1438                                           chunk(GROUP, ch.where, [\
1439                                           chunk(PLAIN, ch.where, 'enumerate')])]
1440                                 i, length = i+2, length+2
1441                         elif envname == 'description':
1442                                 pp[i:i] = [\
1443                                           chunk(CSLINE, ch.where, 'end'), \
1444                                           chunk(GROUP, ch.where, [\
1445                                           chunk(PLAIN, ch.where, 'table')])]
1446                                 i, length = i+2, length+2
1447                         elif (envname == 'tableiii') or (envname == 'tableii'):
1448                                 pp[i:i] = [\
1449                                           chunk(CSLINE, ch.where, 'end'), \
1450                                           chunk(GROUP, ch.where, [\
1451                                           chunk(PLAIN, ch.where, 'table')])]
1452                                 i, length = i+2, length + 2
1453                                 pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
1454                                 i, length = i+1, length+1
1455
1456                         elif envname in ('funcdesc', 'excdesc', 'datadesc'):
1457                                 pp[i:i] = [\
1458                                           chunk(CSLINE, ch.where, 'end'), \
1459                                           chunk(GROUP, ch.where, [\
1460                                           chunk(PLAIN, ch.where, hist.command)])]
1461                                 i, length = i+2, length+2
1462                         else:
1463                                 print 'WARNING: ending env ' + `envname` + 'has no actions'
1464
1465                 elif ch.chtype == chunk_type(CSNAME):
1466                         # control name transformations
1467                         if s(buf, ch.data) == 'optional':
1468                                 pp[i-1].chtype = chunk_type (PLAIN)
1469                                 pp[i-1].data = '['
1470                                 if (i < length) and \
1471                                    (pp[i].chtype == chunk_type(GROUP)):
1472                                         cp=pp[i].data
1473                                         pp[i:i+1]=cp + [\
1474                                             chunk(PLAIN, ch.where, ']')]
1475                                         length = length+len(cp)
1476                         elif s(buf, ch.data) in ignoredcommands:
1477                                 del pp[i-1]
1478                                 i, length = i-1, length-1
1479                         elif s(buf, ch.data) == '@' and \
1480                                   i != length and \
1481                                   pp[i].chtype == chunk_type(PLAIN) and \
1482                                   s(buf, pp[i].data)[0] == '.':
1483                                 # \@. --> \. --> @.
1484                                 ch.data = '.'
1485                                 del pp[i]
1486                                 length = length-1
1487                         elif s(buf, ch.data) == '\\':
1488                                 # \\ --> \* --> @*
1489                                 ch.data = '*'
1490                         elif len(s(buf, ch.data)) == 1 and \
1491                                   s(buf, ch.data) in onlylatexspecial:
1492                                 ch.chtype = chunk_type(PLAIN)
1493                                 # check if such a command is followed by
1494                                 # an empty group: e.g., `\%{}'.  If so, remove
1495                                 # this empty group too
1496                                 if i < length and \
1497                                           pp[i].chtype == chunk_type(GROUP) \
1498                                           and len(pp[i].data) == 0:
1499                                         del pp[i]
1500                                         length = length-1
1501
1502                         elif hist.inargs and s(buf, ch.data) in inargsselves:
1503                                 # This is the special processing of the
1504                                 # arguments of the \begin{funcdesc}... or
1505                                 # \funcline... arguments
1506                                 # \, --> , \[ --> [, \] --> ]
1507                                 ch.chtype = chunk_type(PLAIN)
1508
1509                         elif s(buf, ch.data) == 'renewcommand':
1510                                 # \renewcommand{\indexsubitem}....
1511                                 i, length = i-1, length-1
1512                                 del pp[i]
1513                                 length, newi = getnextarg(length, buf, pp, i)
1514                                 if newi-i == 1 \
1515                                           and i < length \
1516                                           and pp[i].chtype == chunk_type(CSNAME) \
1517                                           and s(buf, pp[i].data) == 'indexsubitem':
1518                                         del pp[i:newi]
1519                                         length = length - (newi-i)
1520                                         length, newi = getnextarg(length, buf, pp, i)
1521                                         text = flattext(buf, pp[i:newi])
1522                                         if text[:1] != '(' or text[-1:] != ')':
1523                                                 raise error, 'expected indexsubitme enclosed in braces'
1524                                         words = string.split(text[1:-1])
1525                                         hist.indexsubitem = words
1526                                         del text, words
1527                                 else:
1528                                         print 'WARNING: renewcommand with unsupported arg removed'
1529                                 del pp[i:newi]
1530                                 length = length - (newi-i)
1531
1532                         elif s(buf, ch.data) == 'item':
1533                                 ch.chtype = chunk_type(CSLINE)
1534                                 length, newi = getoptarg(length, buf, pp, i)
1535                                 ingroupch = pp[i:newi]
1536                                 del pp[i:newi]
1537                                 length = length - (newi-i)
1538                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1539                                 i, length = i+1, length+1
1540
1541                         elif s(buf, ch.data) == 'ttindex':
1542                                 idxsi = hist.indexsubitem
1543
1544                                 cat_class = ''
1545                                 if len(idxsi) >= 2 and idxsi[1] in \
1546                                           ('method', 'function', 'protocol'):
1547                                         command = 'findex'
1548                                 elif len(idxsi) >= 2 and idxsi[1] in \
1549                                           ('exception', 'object'):
1550                                         command = 'vindex'
1551                                 else:
1552                                         print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
1553                                         command = 'cindex'
1554
1555                                 if not cat_class:
1556                                         cat_class = '('+string.join(idxsi)+')'
1557
1558                                 ch.chtype = chunk_type(CSLINE)
1559                                 ch.data = command
1560
1561                                 length, newi = getnextarg(length, buf, pp, i)
1562                                 arg = pp[i:newi]
1563                                 del pp[i:newi]
1564                                 length = length - (newi-i)
1565
1566                                 cat_arg = [chunk(PLAIN, ch.where, cat_class)]
1567
1568                                 # determine what should be set in roman, and
1569                                 # what in tt-font
1570                                 if command in regindices:
1571
1572                                         arg = [chunk(CSNAME, ch.where, 't'), \
1573                                                   chunk(GROUP, ch.where, arg)]
1574                                 else:
1575                                         cat_arg = [chunk(CSNAME, ch.where, 'r'), \
1576                                                   chunk(GROUP, ch.where, cat_arg)]
1577
1578                                 ingroupch = arg + \
1579                                           [chunk(PLAIN, ch.where, ' ')] + \
1580                                           cat_arg
1581
1582                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1583                                 length, i = length+1, i+1
1584
1585
1586                         elif s(buf, ch.data) == 'ldots':
1587                                 # \ldots --> \dots{} --> @dots{}
1588                                 ch.data = 'dots'
1589                                 if i == length \
1590                                           or pp[i].chtype != chunk_type(GROUP) \
1591                                           or pp[i].data != []:
1592                                         pp.insert(i, chunk(GROUP, ch.where, []))
1593                                         i, length = i+1, length+1
1594                         elif s(buf, ch.data) in wordsselves:
1595                                 # \UNIX --> UNIX
1596                                 ch.chtype = chunk_type(PLAIN)
1597                                 if i != length \
1598                                           and pp[i].chtype == chunk_type(GROUP) \
1599                                           and pp[i].data == []:
1600                                         del pp[i]
1601                                         length = length-1
1602                         elif s(buf, ch.data) in for_texi:
1603                                 pass
1604
1605                         elif s(buf, ch.data) == 'e':
1606                                 # \e --> \
1607                                 ch.data = '\\'
1608                                 ch.chtype = chunk_type(PLAIN)
1609                         elif (s(buf, ch.data) == 'lineiii') or\
1610                              (s(buf, ch.data) == 'lineii'):
1611                                 # This is the most tricky one
1612                                 # \lineiii{a1}{a2}[{a3}] -->
1613                                 # @item @<cts. of itemargmacro>{a1}
1614                                 #  a2 [ -- a3]
1615                                 #
1616                                 ##print 'LINEIIIIII!!!!!!!'
1617 ##                              wobj = Wobj().init()
1618 ##                              dumpit(buf, wobj.write, pp[i-1:i+5])
1619 ##                              print '--->' + wobj.data + '<----'
1620                                 if not hist.inenv:
1621                                         raise error, \
1622                                                   'no environment for lineiii'
1623                                 if (hist.inenv[0] != 'tableiii') and\
1624                                    (hist.inenv[0] != 'tableii'):
1625                                         raise error, \
1626                                                   'wrong command (' + \
1627                                                   s(buf, ch.data)+ \
1628                                                   ') in wrong environment (' \
1629                                                   + `hist.inenv[0]` + ')'
1630                                 ch.chtype = chunk_type(CSLINE)
1631                                 ch.data = 'item'
1632                                 length, newi = getnextarg(length, buf, pp, i)
1633                                 ingroupch = [chunk(CSNAME, 0, \
1634                                           hist.itemargmacro), \
1635                                           chunk(GROUP, 0, pp[i:newi])]
1636                                 del pp[i:newi]
1637                                 length = length - (newi-i)
1638 ##                              print 'ITEM ARG: --->',
1639 ##                              wobj = Wobj().init()
1640 ##                              dumpit(buf, wobj.write, ingroupch)
1641 ##                              print wobj.data, '<---'
1642                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1643                                 grouppos = i
1644                                 i, length = i+1, length+1
1645                                 length, i = getnextarg(length, buf, pp, i)
1646                                 length, newi = getnextarg(length, buf, pp, i)
1647                                 if newi > i:
1648                                         # we have a 3rd arg
1649                                         pp.insert(i, chunk(PLAIN, ch.where, '  ---  '))
1650                                         i = newi + 1
1651                                         length = length + 1
1652 ##                                      pp[grouppos].data = pp[grouppos].data \
1653 ##                                                + [chunk(PLAIN, ch.where, '  ')] \
1654 ##                                                + pp[i:newi]
1655 ##                                      del pp[i:newi]
1656 ##                                      length = length - (newi-i)
1657                                 if length != len(pp):
1658                                         raise 'IN LINEIII IS THE ERR', `i`
1659
1660                         elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
1661                                 #\xxxsection{A} ---->
1662                                 # @node A, , ,
1663                                 # @xxxsection A
1664                                 ## also: remove commas and quotes
1665                                 ch.chtype = chunk_type(CSLINE)
1666                                 length, newi = getnextarg(length, buf, pp, i)
1667                                 afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
1668                                 if afternodenamecmd < 0:
1669                                         cp1 = crcopy(pp[i:newi])
1670                                         pp[i:newi] = [\
1671                                                   chunk(GROUP, ch.where, \
1672                                                   pp[i:newi])]
1673                                         length, newi = length - (newi-i) + 1, \
1674                                                   i+1
1675                                         text = flattext(buf, cp1)
1676                                         text = invent_node_names(text)
1677                                 else:
1678                                         length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
1679                                         cp1 = crcopy(pp[afternodenamecmd:endarg])
1680                                         del pp[newi:endarg]
1681                                         length = length - (endarg-newi)
1682
1683                                         pp[i:newi] = [\
1684                                                   chunk(GROUP, ch.where, \
1685                                                   pp[i:newi])]
1686                                         length, newi = length - (newi-i) + 1, \
1687                                                   i + 1
1688                                         text = flattext(buf, cp1)
1689                                 if text[-1] == '.':
1690                                         text = text[:-1]
1691 ##                              print 'FLATTEXT:', `text`
1692                                 if text in hist.nodenames:
1693                                         print 'WARNING: node name ' + `text` + ' already used'
1694                                         out.doublenodes.append(text)
1695                                 else:
1696                                         hist.nodenames.append(text)
1697                                 text = rm_commas_etc(text)
1698                                 pp[i-1:i-1] = [\
1699                                           chunk(CSLINE, ch.where, 'node'), \
1700                                           chunk(GROUP, ch.where, [\
1701                                           chunk(PLAIN, ch.where, text+', , ,')\
1702                                           ])]
1703                                 i, length = newi+2, length+2
1704
1705                         elif s(buf,ch.data) == 'funcline':
1706                                 # fold it to a very short environment
1707                                 pp[i-1:i-1] = [\
1708                                           chunk(CSLINE, ch.where, 'end'), \
1709                                           chunk(GROUP, ch.where, [\
1710                                           chunk(PLAIN, ch.where, hist.command)])]
1711                                 i, length = i+2, length+2
1712                                 length, i = do_funcdesc(length, buf, pp, i)
1713
1714                         elif s(buf,ch.data) == 'dataline':
1715                                 pp[i-1:i-1] = [\
1716                                           chunk(CSLINE, ch.where, 'end'), \
1717                                           chunk(GROUP, ch.where, [\
1718                                           chunk(PLAIN, ch.where, hist.command)])]
1719                                 i, length = i+2, length+2
1720                                 length, i = do_datadesc(length, buf, pp, i)
1721
1722                         elif s(buf,ch.data) == 'excline':
1723                                 pp[i-1:i-1] = [\
1724                                           chunk(CSLINE, ch.where, 'end'), \
1725                                           chunk(GROUP, ch.where, [\
1726                                           chunk(PLAIN, ch.where, hist.command)])]
1727                                 i, length = i+2, length+2
1728                                 length, i = do_excdesc(length, buf, pp, i)
1729
1730
1731                         elif s(buf, ch.data) == 'index':
1732                                 #\index{A} --->
1733                                 # @cindex A
1734                                 ch.chtype = chunk_type(CSLINE)
1735                                 ch.data = 'cindex'
1736                                 length, newi = getnextarg(length, buf, pp, i)
1737
1738                                 ingroupch = pp[i:newi]
1739                                 del pp[i:newi]
1740                                 length = length - (newi-i)
1741                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1742                                 length, i = length+1, i+1
1743
1744                         elif s(buf, ch.data) == 'bifuncindex':
1745                                 ch.chtype = chunk_type(CSLINE)
1746                                 ch.data = 'findex'
1747                                 length, newi = getnextarg(length, buf, pp, i)
1748                                 ingroupch = pp[i:newi]
1749                                 del pp[i:newi]
1750                                 length = length - (newi-i)
1751
1752                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1753                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1754                                 ingroupch.append(chunk(GROUP, ch.where, [\
1755                                           chunk(PLAIN, ch.where, \
1756                                           '(built-in function)')]))
1757
1758                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1759                                 length, i = length+1, i+1
1760
1761
1762                         elif s(buf, ch.data) == 'obindex':
1763                                 ch.chtype = chunk_type(CSLINE)
1764                                 ch.data = 'findex'
1765                                 length, newi = getnextarg(length, buf, pp, i)
1766                                 ingroupch = pp[i:newi]
1767                                 del pp[i:newi]
1768                                 length = length - (newi-i)
1769
1770                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1771                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1772                                 ingroupch.append(chunk(GROUP, ch.where, [\
1773                                           chunk(PLAIN, ch.where, \
1774                                           '(object)')]))
1775
1776                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1777                                 length, i = length+1, i+1
1778
1779
1780                         elif s(buf, ch.data) == 'opindex':
1781                                 ch.chtype = chunk_type(CSLINE)
1782                                 ch.data = 'findex'
1783                                 length, newi = getnextarg(length, buf, pp, i)
1784                                 ingroupch = pp[i:newi]
1785                                 del pp[i:newi]
1786                                 length = length - (newi-i)
1787
1788                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1789                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1790                                 ingroupch.append(chunk(GROUP, ch.where, [\
1791                                           chunk(PLAIN, ch.where, \
1792                                           '(operator)')]))
1793
1794                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1795                                 length, i = length+1, i+1
1796
1797
1798                         elif s(buf, ch.data) == 'bimodindex':
1799                                 ch.chtype = chunk_type(CSLINE)
1800                                 ch.data = 'pindex'
1801                                 length, newi = getnextarg(length, buf, pp, i)
1802                                 ingroupch = pp[i:newi]
1803                                 del pp[i:newi]
1804                                 length = length - (newi-i)
1805
1806                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1807                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1808                                 ingroupch.append(chunk(GROUP, ch.where, [\
1809                                           chunk(PLAIN, ch.where, \
1810                                           '(built-in)')]))
1811
1812                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1813                                 length, i = length+1, i+1
1814
1815                         elif s(buf, ch.data) == 'sectcode':
1816                                 ch.data = 'code'
1817
1818
1819                         elif s(buf, ch.data) == 'stmodindex':
1820                                 ch.chtype = chunk_type(CSLINE)
1821                                 # use the program index as module index
1822                                 ch.data = 'pindex'
1823                                 length, newi = getnextarg(length, buf, pp, i)
1824                                 ingroupch = pp[i:newi]
1825                                 del pp[i:newi]
1826                                 length = length - (newi-i)
1827
1828                                 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1829                                 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1830                                 ingroupch.append(chunk(GROUP, ch.where, [\
1831                                           chunk(PLAIN, ch.where, \
1832                                           '(standard)')]))
1833
1834                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1835                                 length, i = length+1, i+1
1836
1837
1838                         elif s(buf, ch.data) == 'stindex':
1839                                 # XXX must actually go to newindex st
1840                                 wh = ch.where
1841                                 ch.chtype = chunk_type(CSLINE)
1842                                 ch.data = 'cindex'
1843                                 length, newi = getnextarg(length, buf, pp, i)
1844                                 ingroupch = [chunk(CSNAME, wh, 'code'), \
1845                                           chunk(GROUP, wh, pp[i:newi])]
1846
1847                                 del pp[i:newi]
1848                                 length = length - (newi-i)
1849
1850                                 t = ingroupch[:]
1851                                 t.append(chunk(PLAIN, wh, ' statement'))
1852
1853                                 pp.insert(i, chunk(GROUP, wh, t))
1854                                 i, length = i+1, length+1
1855
1856                                 pp.insert(i, chunk(CSLINE, wh, 'cindex'))
1857                                 i, length = i+1, length+1
1858
1859                                 t = ingroupch[:]
1860                                 t.insert(0, chunk(PLAIN, wh, 'statement, '))
1861
1862                                 pp.insert(i, chunk(GROUP, wh, t))
1863                                 i, length = i+1, length+1
1864
1865
1866                         elif s(buf, ch.data) == 'indexii':
1867                                 #\indexii{A}{B} --->
1868                                 # @cindex A B
1869                                 # @cindex B, A
1870                                 length, newi = getnextarg(length, buf, pp, i)
1871                                 cp11 = pp[i:newi]
1872                                 cp21 = crcopy(pp[i:newi])
1873                                 del pp[i:newi]
1874                                 length = length - (newi-i)
1875                                 length, newi = getnextarg(length, buf, pp, i)
1876                                 cp12 = pp[i:newi]
1877                                 cp22 = crcopy(pp[i:newi])
1878                                 del pp[i:newi]
1879                                 length = length - (newi-i)
1880
1881                                 ch.chtype = chunk_type(CSLINE)
1882                                 ch.data = 'cindex'
1883                                 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1884                                           chunk(PLAIN, ch.where, ' ')] + cp12))
1885                                 i, length = i+1, length+1
1886                                 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1887                                           chunk(GROUP, ch.where, cp22 + [\
1888                                           chunk(PLAIN, ch.where, ', ')]+ cp21)]
1889                                 i, length = i+2, length+2
1890
1891                         elif s(buf, ch.data) == 'indexiii':
1892                                 length, newi = getnextarg(length, buf, pp, i)
1893                                 cp11 = pp[i:newi]
1894                                 cp21 = crcopy(pp[i:newi])
1895                                 cp31 = crcopy(pp[i:newi])
1896                                 del pp[i:newi]
1897                                 length = length - (newi-i)
1898                                 length, newi = getnextarg(length, buf, pp, i)
1899                                 cp12 = pp[i:newi]
1900                                 cp22 = crcopy(pp[i:newi])
1901                                 cp32 = crcopy(pp[i:newi])
1902                                 del pp[i:newi]
1903                                 length = length - (newi-i)
1904                                 length, newi = getnextarg(length, buf, pp, i)
1905                                 cp13 = pp[i:newi]
1906                                 cp23 = crcopy(pp[i:newi])
1907                                 cp33 = crcopy(pp[i:newi])
1908                                 del pp[i:newi]
1909                                 length = length - (newi-i)
1910
1911                                 ch.chtype = chunk_type(CSLINE)
1912                                 ch.data = 'cindex'
1913                                 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1914                                           chunk(PLAIN, ch.where, ' ')] + cp12 \
1915                                           + [chunk(PLAIN, ch.where, ' ')] \
1916                                           + cp13))
1917                                 i, length = i+1, length+1
1918                                 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1919                                           chunk(GROUP, ch.where, cp22 + [\
1920                                           chunk(PLAIN, ch.where, ' ')]+ cp23\
1921                                           + [chunk(PLAIN, ch.where, ', ')] +\
1922                                           cp21)]
1923                                 i, length = i+2, length+2
1924                                 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1925                                           chunk(GROUP, ch.where, cp33 + [\
1926                                           chunk(PLAIN, ch.where, ', ')]+ cp31\
1927                                           + [chunk(PLAIN, ch.where, ' ')] +\
1928                                           cp32)]
1929                                 i, length = i+2, length+2
1930
1931
1932                         elif s(buf, ch.data) == 'indexiv':
1933                                 length, newi = getnextarg(length, buf, pp, i)
1934                                 cp11 = pp[i:newi]
1935                                 cp21 = crcopy(pp[i:newi])
1936                                 cp31 = crcopy(pp[i:newi])
1937                                 cp41 = crcopy(pp[i:newi])
1938                                 del pp[i:newi]
1939                                 length = length - (newi-i)
1940                                 length, newi = getnextarg(length, buf, pp, i)
1941                                 cp12 = pp[i:newi]
1942                                 cp22 = crcopy(pp[i:newi])
1943                                 cp32 = crcopy(pp[i:newi])
1944                                 cp42 = crcopy(pp[i:newi])
1945                                 del pp[i:newi]
1946                                 length = length - (newi-i)
1947                                 length, newi = getnextarg(length, buf, pp, i)
1948                                 cp13 = pp[i:newi]
1949                                 cp23 = crcopy(pp[i:newi])
1950                                 cp33 = crcopy(pp[i:newi])
1951                                 cp43 = crcopy(pp[i:newi])
1952                                 del pp[i:newi]
1953                                 length = length - (newi-i)
1954                                 length, newi = getnextarg(length, buf, pp, i)
1955                                 cp14 = pp[i:newi]
1956                                 cp24 = crcopy(pp[i:newi])
1957                                 cp34 = crcopy(pp[i:newi])
1958                                 cp44 = crcopy(pp[i:newi])
1959                                 del pp[i:newi]
1960                                 length = length - (newi-i)
1961
1962                                 ch.chtype = chunk_type(CSLINE)
1963                                 ch.data = 'cindex'
1964                                 ingroupch = cp11 + \
1965                                           spacech + cp12 + \
1966                                           spacech + cp13 + \
1967                                           spacech + cp14
1968                                 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1969                                 i, length = i+1, length+1
1970                                 ingroupch = cp22 + \
1971                                           spacech + cp23 + \
1972                                           spacech + cp24 + \
1973                                           commach + cp21
1974                                 pp[i:i] = cindexch + [\
1975                                           chunk(GROUP, ch.where, ingroupch)]
1976                                 i, length = i+2, length+2
1977                                 ingroupch = cp33 + \
1978                                           spacech + cp34 + \
1979                                           commach + cp31 + \
1980                                           spacech + cp32
1981                                 pp[i:i] = cindexch + [\
1982                                           chunk(GROUP, ch.where, ingroupch)]
1983                                 i, length = i+2, length+2
1984                                 ingroupch = cp44 + \
1985                                           commach + cp41 + \
1986                                           spacech + cp42 + \
1987                                           spacech + cp43
1988                                 pp[i:i] = cindexch + [\
1989                                           chunk(GROUP, ch.where, ingroupch)]
1990                                 i, length = i+2, length+2
1991
1992
1993
1994                         else:
1995                                 print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
1996
1997
1998
1999 re_atsign = regex.compile('[@{}]')
2000 re_newline = regex.compile('\n')
2001
2002 def dumpit(buf, wm, pp):
2003
2004         global out
2005
2006         i, length = 0, len(pp)
2007
2008         addspace = 0
2009
2010         while 1:
2011                 if len(pp) != length:
2012                         raise 'FATAL', 'inconsistent length'
2013                 if i == length:
2014                         break
2015                 ch = pp[i]
2016                 i = i + 1
2017
2018                 if addspace:
2019                         dospace = 1
2020                         addspace = 0
2021                 else:
2022                         dospace = 0
2023
2024                 if ch.chtype == chunk_type(CSNAME):
2025                         wm('@' + s(buf, ch.data))
2026                         if s(buf, ch.data) == 'node' and \
2027                                   pp[i].chtype == chunk_type(PLAIN) and \
2028                                   s(buf, pp[i].data) in out.doublenodes:
2029                                 ##XXX doesnt work yet??
2030                                 wm(' ZZZ-' + zfill(`i`, 4))
2031                         if s(buf, ch.data)[0] in string.letters:
2032                                 addspace = 1
2033                 elif ch.chtype == chunk_type(PLAIN):
2034                         if dospace and s(buf, ch.data) not in (' ', '\t'):
2035                                 wm(' ')
2036                         text = s(buf, ch.data)
2037                         while 1:
2038                                 pos = re_atsign.search(text)
2039                                 if pos < 0:
2040                                         break
2041                                 wm(text[:pos] + '@' + text[pos])
2042                                 text = text[pos+1:]
2043                         wm(text)
2044                 elif ch.chtype == chunk_type(GROUP):
2045                         wm('{')
2046                         dumpit(buf, wm, ch.data)
2047                         wm('}')
2048                 elif ch.chtype == chunk_type(DENDLINE):
2049                         wm('\n\n')
2050                         while i != length and pp[i].chtype in \
2051                                   (chunk_type(DENDLINE), chunk_type(ENDLINE)):
2052                                 i = i + 1
2053                 elif ch.chtype == chunk_type(OTHER):
2054                         wm(s(buf, ch.data))
2055                 elif ch.chtype == chunk_type(ACTIVE):
2056                         wm(s(buf, ch.data))
2057                 elif ch.chtype == chunk_type(ENDLINE):
2058                         wm('\n')
2059                 elif ch.chtype == chunk_type(CSLINE):
2060                         if i >= 2 and pp[i-2].chtype not in \
2061                                   (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2062                                   and (pp[i-2].chtype != chunk_type(PLAIN) \
2063                                   or s(buf, pp[i-2].data)[-1] != '\n'):
2064
2065                                 wm('\n')
2066                         wm('@' + s(buf, ch.data))
2067                         if i == length:
2068                                 raise error, 'CSLINE expected another chunk'
2069                         if pp[i].chtype != chunk_type(GROUP):
2070                                 raise error, 'CSLINE expected GROUP'
2071                         if type(pp[i].data) != type([]):
2072                                 raise error, 'GROUP chould contain []-data'
2073
2074                         wobj = Wobj().init()
2075                         dumpit(buf, wobj.write, pp[i].data)
2076                         i = i + 1
2077                         text = wobj.data
2078                         del wobj
2079                         if text:
2080                                 wm(' ')
2081                                 while 1:
2082                                         pos = re_newline.search(text)
2083                                         if pos < 0:
2084                                                 break
2085                                         print 'WARNING: found newline in csline arg'
2086                                         wm(text[:pos] + ' ')
2087                                         text = text[pos+1:]
2088                                 wm(text)
2089                         if i >= length or \
2090                                   pp[i].chtype not in (chunk_type(CSLINE), \
2091                                   chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2092                                   and (pp[i].chtype != chunk_type(PLAIN) \
2093                                   or s(buf, pp[i].data)[0] != '\n'):
2094                                 wm('\n')
2095
2096                 elif ch.chtype == chunk_type(COMMENT):
2097 ##                      print 'COMMENT: previous chunk =', pp[i-2]
2098 ##                      if pp[i-2].chtype == chunk_type(PLAIN):
2099 ##                              print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
2100                         if s(buf, ch.data) and \
2101                                   regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
2102                                 if i >= 2 and pp[i-2].chtype not in \
2103                                           (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2104                                           and not (pp[i-2].chtype == chunk_type(PLAIN) \
2105                                           and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
2106                                         print 'ADDING NEWLINE'
2107                                         wm('\n')
2108                                 wm('@c ' + s(buf, ch.data))
2109                 elif ch.chtype == chunk_type(IGNORE):
2110                         pass
2111                 else:
2112                         try:
2113                                 str = `s(buf, ch.data)`
2114                         except TypeError:
2115                                 str = `ch.data`
2116                         if len(str) > 400:
2117                                 str = str[:400] + '...'
2118                         print 'warning:', ch.chtype, 'not handled, data ' + str
2119
2120
2121
2122 def main():
2123         outfile = None
2124         headerfile = 'texipre.dat'
2125         trailerfile = 'texipost.dat'
2126
2127         try:
2128                 opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
2129         except getopt.error:
2130                 args = []
2131
2132         if not args:
2133                 print 'usage: partparse [-o outfile] [-h headerfile]',
2134                 print '[-t trailerfile] file ...'
2135                 sys.exit(2)
2136
2137         for opt, arg in opts:
2138                 if opt == '-o': outfile = arg
2139                 if opt == '-h': headerfile = arg
2140                 if opt == '-t': trailerfile = arg
2141
2142         if not outfile:
2143                 root, ext = os.path.splitext(args[0])
2144                 outfile = root + '.texi'
2145
2146         if outfile in args:
2147                 print 'will not overwrite input file', outfile
2148                 sys.exit(2)
2149
2150         outf = open(outfile, 'w')
2151         outf.write(open(headerfile, 'r').read())
2152
2153         for file in args:
2154                 if len(args) > 1: print '='*20, file, '='*20
2155                 buf = open(file, 'r').read()
2156                 w, pp = parseit(buf)
2157                 startchange()
2158                 changeit(buf, pp)
2159                 dumpit(buf, outf.write, pp)
2160
2161         outf.write(open(trailerfile, 'r').read())
2162
2163         outf.close()
2164
2165 main()