tools/yacc.py

   1 # -----------------------------------------------------------------------------
   2 # ply: yacc.py
   3 #
   4 # Copyright (C) 2001-2018
   5 # David M. Beazley (Dabeaz LLC)
   6 # All rights reserved.
   7 #
   8 # SPDX-License-Identifier: BSD-3-Clause
   9 #
  10 # :::::::: WARNING :::::::
  11 #
  12 # Construction of LR parsing tables is fairly complicated and expensive.
  13 # To make this module run fast, a *LOT* of work has been put into
  14 # optimization---often at the expensive of readability and what might
  15 # consider to be good Python "coding style."   Modify the code at your
  16 # own risk!
  17 # ----------------------------------------------------------------------------
  18
  19 import re
  20 import types
  21 import sys
  22 import os.path
  23 import inspect
  24 import warnings
  25
  26 __version__    = '3.11'
  27 __tabversion__ = '3.10'
  28
  29 #-----------------------------------------------------------------------------
  30 #                     === User configurable parameters ===
  31 #
  32 # Change these to modify the default behavior of yacc (if you wish)
  33 #-----------------------------------------------------------------------------
  34
  35 yaccdebug   = True             # Debugging mode.  If set, yacc generates a
  36                                # a 'parser.out' file in the current directory
  37
  38 debug_file  = 'parser.out'     # Default name of the debugging file
  39 tab_module  = 'parsetab'       # Default name of the table module
  40 default_lr  = 'LALR'           # Default LR table generation method
  41
  42 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
  43
  44 yaccdevel   = False            # Set to True if developing yacc.  This turns off optimized
  45                                # implementations of certain functions.
  46
  47 resultlimit = 40               # Size limit of results when running in debug mode.
  48
  49 pickle_protocol = 0            # Protocol to use when writing pickle files
  50
  51 # String type-checking compatibility
  52 if sys.version_info[0] < 3:
  53     string_types = basestring
  54 else:
  55     string_types = str
  56
  57 MAXINT = sys.maxsize
  58
  59 # This object is a stand-in for a logging object created by the
  60 # logging module.   PLY will use this by default to create things
  61 # such as the parser.out file.  If a user wants more detailed
  62 # information, they can create their own logging object and pass
  63 # it into PLY.
  64
  65 class PlyLogger(object):
  66     def __init__(self, f):
  67         self.f = f
  68
  69     def debug(self, msg, *args, **kwargs):
  70         self.f.write((msg % args) + '\n')
  71
  72     info = debug
  73
  74     def warning(self, msg, *args, **kwargs):
  75         self.f.write('WARNING: ' + (msg % args) + '\n')
  76
  77     def error(self, msg, *args, **kwargs):
  78         self.f.write('ERROR: ' + (msg % args) + '\n')
  79
  80     critical = debug
  81
  82 # Null logger is used when no output is generated. Does nothing.
  83 class NullLogger(object):
  84     def __getattribute__(self, name):
  85         return self
  86
  87     def __call__(self, *args, **kwargs):
  88         return self
  89
  90 # Exception raised for yacc-related errors
  91 class YaccError(Exception):
  92     pass
  93
  94 # Format the result message that the parser produces when running in debug mode.
  95 def format_result(r):
  96     repr_str = repr(r)
  97     if '\n' in repr_str:
  98         repr_str = repr(repr_str)
  99     if len(repr_str) > resultlimit:
 100         repr_str = repr_str[:resultlimit] + ' ...'
 101     result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
 102     return result
 103
 104 # Format stack entries when the parser is running in debug mode
 105 def format_stack_entry(r):
 106     repr_str = repr(r)
 107     if '\n' in repr_str:
 108         repr_str = repr(repr_str)
 109     if len(repr_str) < 16:
 110         return repr_str
 111     else:
 112         return '<%s @ 0x%x>' % (type(r).__name__, id(r))
 113
 114 # Panic mode error recovery support.   This feature is being reworked--much of the
 115 # code here is to offer a deprecation/backwards compatible transition
 116
 117 _errok = None
 118 _token = None
 119 _restart = None
 120 _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
 121 Instead, invoke the methods on the associated parser instance:
 122
 123     def p_error(p):
 124         ...
 125         # Use parser.errok(), parser.token(), parser.restart()
 126         ...
 127
 128     parser = yacc.yacc()
 129 '''
 130
 131 def errok():
 132     warnings.warn(_warnmsg)
 133     return _errok()
 134
 135 def restart():
 136     warnings.warn(_warnmsg)
 137     return _restart()
 138
 139 def token():
 140     warnings.warn(_warnmsg)
 141     return _token()
 142
 143 # Utility function to call the p_error() function with some deprecation hacks
 144 def call_errorfunc(errorfunc, token, parser):
 145     global _errok, _token, _restart
 146     _errok = parser.errok
 147     _token = parser.token
 148     _restart = parser.restart
 149     r = errorfunc(token)
 150     try:
 151         del _errok, _token, _restart
 152     except NameError:
 153         pass
 154     return r
 155
 156 #-----------------------------------------------------------------------------
 157 #                        ===  LR Parsing Engine ===
 158 #
 159 # The following classes are used for the LR parser itself.  These are not
 160 # used during table construction and are independent of the actual LR
 161 # table generation algorithm
 162 #-----------------------------------------------------------------------------
 163
 164 # This class is used to hold non-terminal grammar symbols during parsing.
 165 # It normally has the following attributes set:
 166 #        .type       = Grammar symbol type
 167 #        .value      = Symbol value
 168 #        .lineno     = Starting line number
 169 #        .endlineno  = Ending line number (optional, set automatically)
 170 #        .lexpos     = Starting lex position
 171 #        .endlexpos  = Ending lex position (optional, set automatically)
 172
 173 class YaccSymbol:
 174     def __str__(self):
 175         return self.type
 176
 177     def __repr__(self):
 178         return str(self)
 179
 180 # This class is a wrapper around the objects actually passed to each
 181 # grammar rule.   Index lookup and assignment actually assign the
 182 # .value attribute of the underlying YaccSymbol object.
 183 # The lineno() method returns the line number of a given
 184 # item (or 0 if not defined).   The linespan() method returns
 185 # a tuple of (startline,endline) representing the range of lines
 186 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
 187 # representing the range of positional information for a symbol.
 188
 189 class YaccProduction:
 190     def __init__(self, s, stack=None):
 191         self.slice = s
 192         self.stack = stack
 193         self.lexer = None
 194         self.parser = None
 195
 196     def __getitem__(self, n):
 197         if isinstance(n, slice):
 198             return [s.value for s in self.slice[n]]
 199         elif n >= 0:
 200             return self.slice[n].value
 201         else:
 202             return self.stack[n].value
 203
 204     def __setitem__(self, n, v):
 205         self.slice[n].value = v
 206
 207     def __getslice__(self, i, j):
 208         return [s.value for s in self.slice[i:j]]
 209
 210     def __len__(self):
 211         return len(self.slice)
 212
 213     def lineno(self, n):
 214         return getattr(self.slice[n], 'lineno', 0)
 215
 216     def set_lineno(self, n, lineno):
 217         self.slice[n].lineno = lineno
 218
 219     def linespan(self, n):
 220         startline = getattr(self.slice[n], 'lineno', 0)
 221         endline = getattr(self.slice[n], 'endlineno', startline)
 222         return startline, endline
 223
 224     def lexpos(self, n):
 225         return getattr(self.slice[n], 'lexpos', 0)
 226
 227     def set_lexpos(self, n, lexpos):
 228         self.slice[n].lexpos = lexpos
 229
 230     def lexspan(self, n):
 231         startpos = getattr(self.slice[n], 'lexpos', 0)
 232         endpos = getattr(self.slice[n], 'endlexpos', startpos)
 233         return startpos, endpos
 234
 235     def error(self):
 236         raise SyntaxError
 237
 238 # -----------------------------------------------------------------------------
 239 #                               == LRParser ==
 240 #
 241 # The LR Parsing engine.
 242 # -----------------------------------------------------------------------------
 243
 244 class LRParser:
 245     def __init__(self, lrtab, errorf):
 246         self.productions = lrtab.lr_productions
 247         self.action = lrtab.lr_action
 248         self.goto = lrtab.lr_goto
 249         self.errorfunc = errorf
 250         self.set_defaulted_states()
 251         self.errorok = True
 252
 253     def errok(self):
 254         self.errorok = True
 255
 256     def restart(self):
 257         del self.statestack[:]
 258         del self.symstack[:]
 259         sym = YaccSymbol()
 260         sym.type = '$end'
 261         self.symstack.append(sym)
 262         self.statestack.append(0)
 263
 264     # Defaulted state support.
 265     # This method identifies parser states where there is only one possible reduction action.
 266     # For such states, the parser can make a choose to make a rule reduction without consuming
 267     # the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
 268     # certain kinds of advanced parsing situations where the lexer and parser interact with
 269     # each other or change states (i.e., manipulation of scope, lexer states, etc.).
 270     #
 271     # See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
 272     def set_defaulted_states(self):
 273         self.defaulted_states = {}
 274         for state, actions in self.action.items():
 275             rules = list(actions.values())
 276             if len(rules) == 1 and rules[0] < 0:
 277                 self.defaulted_states[state] = rules[0]
 278
 279     def disable_defaulted_states(self):
 280         self.defaulted_states = {}
 281
 282     def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 283         if debug or yaccdevel:
 284             if isinstance(debug, int):
 285                 debug = PlyLogger(sys.stderr)
 286             return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
 287         elif tracking:
 288             return self.parseopt(input, lexer, debug, tracking, tokenfunc)
 289         else:
 290             return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
 291
 292
 293     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 294     # parsedebug().
 295     #
 296     # This is the debugging enabled version of parse().  All changes made to the
 297     # parsing engine should be made here.   Optimized versions of this function
 298     # are automatically created by the ply/ygen.py script.  This script cuts out
 299     # sections enclosed in markers such as this:
 300     #
 301     #      #--! DEBUG
 302     #      statements
 303     #      #--! DEBUG
 304     #
 305     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 306
 307     def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 308         #--! parsedebug-start
 309         lookahead = None                         # Current lookahead symbol
 310         lookaheadstack = []                      # Stack of lookahead symbols
 311         actions = self.action                    # Local reference to action table (to avoid lookup on self.)
 312         goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
 313         prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
 314         defaulted_states = self.defaulted_states # Local reference to defaulted states
 315         pslice  = YaccProduction(None)           # Production object passed to grammar rules
 316         errorcount = 0                           # Used during error recovery
 317
 318         #--! DEBUG
 319         debug.info('PLY: PARSE DEBUG START')
 320         #--! DEBUG
 321
 322         # If no lexer was given, we will try to use the lex module
 323         if not lexer:
 324             from . import lex
 325             lexer = lex.lexer
 326
 327         # Set up the lexer and parser objects on pslice
 328         pslice.lexer = lexer
 329         pslice.parser = self
 330
 331         # If input was supplied, pass to lexer
 332         if input is not None:
 333             lexer.input(input)
 334
 335         if tokenfunc is None:
 336             # Tokenize function
 337             get_token = lexer.token
 338         else:
 339             get_token = tokenfunc
 340
 341         # Set the parser() token method (sometimes used in error recovery)
 342         self.token = get_token
 343
 344         # Set up the state and symbol stacks
 345
 346         statestack = []                # Stack of parsing states
 347         self.statestack = statestack
 348         symstack   = []                # Stack of grammar symbols
 349         self.symstack = symstack
 350
 351         pslice.stack = symstack         # Put in the production
 352         errtoken   = None               # Err token
 353
 354         # The start state is assumed to be (0,$end)
 355
 356         statestack.append(0)
 357         sym = YaccSymbol()
 358         sym.type = '$end'
 359         symstack.append(sym)
 360         state = 0
 361         while True:
 362             # Get the next symbol on the input.  If a lookahead symbol
 363             # is already set, we just use that. Otherwise, we'll pull
 364             # the next token off of the lookaheadstack or from the lexer
 365
 366             #--! DEBUG
 367             debug.debug('')
 368             debug.debug('State  : %s', state)
 369             #--! DEBUG
 370
 371             if state not in defaulted_states:
 372                 if not lookahead:
 373                     if not lookaheadstack:
 374                         lookahead = get_token()     # Get the next token
 375                     else:
 376                         lookahead = lookaheadstack.pop()
 377                     if not lookahead:
 378                         lookahead = YaccSymbol()
 379                         lookahead.type = '$end'
 380
 381                 # Check the action table
 382                 ltype = lookahead.type
 383                 t = actions[state].get(ltype)
 384             else:
 385                 t = defaulted_states[state]
 386                 #--! DEBUG
 387                 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
 388                 #--! DEBUG
 389
 390             #--! DEBUG
 391             debug.debug('Stack  : %s',
 392                         ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 393             #--! DEBUG
 394
 395             if t is not None:
 396                 if t > 0:
 397                     # shift a symbol on the stack
 398                     statestack.append(t)
 399                     state = t
 400
 401                     #--! DEBUG
 402                     debug.debug('Action : Shift and goto state %s', t)
 403                     #--! DEBUG
 404
 405                     symstack.append(lookahead)
 406                     lookahead = None
 407
 408                     # Decrease error count on successful shift
 409                     if errorcount:
 410                         errorcount -= 1
 411                     continue
 412
 413                 if t < 0:
 414                     # reduce a symbol on the stack, emit a production
 415                     p = prod[-t]
 416                     pname = p.name
 417                     plen  = p.len
 418
 419                     # Get production function
 420                     sym = YaccSymbol()
 421                     sym.type = pname       # Production name
 422                     sym.value = None
 423
 424                     #--! DEBUG
 425                     if plen:
 426                         debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
 427                                    '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
 428                                    goto[statestack[-1-plen]][pname])
 429                     else:
 430                         debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
 431                                    goto[statestack[-1]][pname])
 432
 433                     #--! DEBUG
 434
 435                     if plen:
 436                         targ = symstack[-plen-1:]
 437                         targ[0] = sym
 438
 439                         #--! TRACKING
 440                         if tracking:
 441                             t1 = targ[1]
 442                             sym.lineno = t1.lineno
 443                             sym.lexpos = t1.lexpos
 444                             t1 = targ[-1]
 445                             sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
 446                             sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
 447                         #--! TRACKING
 448
 449                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 450                         # The code enclosed in this section is duplicated
 451                         # below as a performance optimization.  Make sure
 452                         # changes get made in both locations.
 453
 454                         pslice.slice = targ
 455
 456                         try:
 457                             # Call the grammar rule with our special slice object
 458                             del symstack[-plen:]
 459                             self.state = state
 460                             p.callable(pslice)
 461                             del statestack[-plen:]
 462                             #--! DEBUG
 463                             debug.info('Result : %s', format_result(pslice[0]))
 464                             #--! DEBUG
 465                             symstack.append(sym)
 466                             state = goto[statestack[-1]][pname]
 467                             statestack.append(state)
 468                         except SyntaxError:
 469                             # If an error was set. Enter error recovery state
 470                             lookaheadstack.append(lookahead)    # Save the current lookahead token
 471                             symstack.extend(targ[1:-1])         # Put the production slice back on the stack
 472                             statestack.pop()                    # Pop back one state (before the reduce)
 473                             state = statestack[-1]
 474                             sym.type = 'error'
 475                             sym.value = 'error'
 476                             lookahead = sym
 477                             errorcount = error_count
 478                             self.errorok = False
 479
 480                         continue
 481                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 482
 483                     else:
 484
 485                         #--! TRACKING
 486                         if tracking:
 487                             sym.lineno = lexer.lineno
 488                             sym.lexpos = lexer.lexpos
 489                         #--! TRACKING
 490
 491                         targ = [sym]
 492
 493                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 494                         # The code enclosed in this section is duplicated
 495                         # above as a performance optimization.  Make sure
 496                         # changes get made in both locations.
 497
 498                         pslice.slice = targ
 499
 500                         try:
 501                             # Call the grammar rule with our special slice object
 502                             self.state = state
 503                             p.callable(pslice)
 504                             #--! DEBUG
 505                             debug.info('Result : %s', format_result(pslice[0]))
 506                             #--! DEBUG
 507                             symstack.append(sym)
 508                             state = goto[statestack[-1]][pname]
 509                             statestack.append(state)
 510                         except SyntaxError:
 511                             # If an error was set. Enter error recovery state
 512                             lookaheadstack.append(lookahead)    # Save the current lookahead token
 513                             statestack.pop()                    # Pop back one state (before the reduce)
 514                             state = statestack[-1]
 515                             sym.type = 'error'
 516                             sym.value = 'error'
 517                             lookahead = sym
 518                             errorcount = error_count
 519                             self.errorok = False
 520
 521                         continue
 522                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 523
 524                 if t == 0:
 525                     n = symstack[-1]
 526                     result = getattr(n, 'value', None)
 527                     #--! DEBUG
 528                     debug.info('Done   : Returning %s', format_result(result))
 529                     debug.info('PLY: PARSE DEBUG END')
 530                     #--! DEBUG
 531                     return result
 532
 533             if t is None:
 534
 535                 #--! DEBUG
 536                 debug.error('Error  : %s',
 537                             ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 538                 #--! DEBUG
 539
 540                 # We have some kind of parsing error here.  To handle
 541                 # this, we are going to push the current token onto
 542                 # the tokenstack and replace it with an 'error' token.
 543                 # If there are any synchronization rules, they may
 544                 # catch it.
 545                 #
 546                 # In addition to pushing the error token, we call call
 547                 # the user defined p_error() function if this is the
 548                 # first syntax error.  This function is only called if
 549                 # errorcount == 0.
 550                 if errorcount == 0 or self.errorok:
 551                     errorcount = error_count
 552                     self.errorok = False
 553                     errtoken = lookahead
 554                     if errtoken.type == '$end':
 555                         errtoken = None               # End of file!
 556                     if self.errorfunc:
 557                         if errtoken and not hasattr(errtoken, 'lexer'):
 558                             errtoken.lexer = lexer
 559                         self.state = state
 560                         tok = call_errorfunc(self.errorfunc, errtoken, self)
 561                         if self.errorok:
 562                             # User must have done some kind of panic
 563                             # mode recovery on their own.  The
 564                             # returned token is the next lookahead
 565                             lookahead = tok
 566                             errtoken = None
 567                             continue
 568                     else:
 569                         if errtoken:
 570                             if hasattr(errtoken, 'lineno'):
 571                                 lineno = lookahead.lineno
 572                             else:
 573                                 lineno = 0
 574                             if lineno:
 575                                 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
 576                             else:
 577                                 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
 578                         else:
 579                             sys.stderr.write('yacc: Parse error in input. EOF\n')
 580                             return
 581
 582                 else:
 583                     errorcount = error_count
 584
 585                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 586                 # entire parse has been rolled back and we're completely hosed.   The token is
 587                 # discarded and we just keep going.
 588
 589                 if len(statestack) <= 1 and lookahead.type != '$end':
 590                     lookahead = None
 591                     errtoken = None
 592                     state = 0
 593                     # Nuke the pushback stack
 594                     del lookaheadstack[:]
 595                     continue
 596
 597                 # case 2: the statestack has a couple of entries on it, but we're
 598                 # at the end of the file. nuke the top entry and generate an error token
 599
 600                 # Start nuking entries on the stack
 601                 if lookahead.type == '$end':
 602                     # Whoa. We're really hosed here. Bail out
 603                     return
 604
 605                 if lookahead.type != 'error':
 606                     sym = symstack[-1]
 607                     if sym.type == 'error':
 608                         # Hmmm. Error is on top of stack, we'll just nuke input
 609                         # symbol and continue
 610                         #--! TRACKING
 611                         if tracking:
 612                             sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
 613                             sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
 614                         #--! TRACKING
 615                         lookahead = None
 616                         continue
 617
 618                     # Create the error symbol for the first time and make it the new lookahead symbol
 619                     t = YaccSymbol()
 620                     t.type = 'error'
 621
 622                     if hasattr(lookahead, 'lineno'):
 623                         t.lineno = t.endlineno = lookahead.lineno
 624                     if hasattr(lookahead, 'lexpos'):
 625                         t.lexpos = t.endlexpos = lookahead.lexpos
 626                     t.value = lookahead
 627                     lookaheadstack.append(lookahead)
 628                     lookahead = t
 629                 else:
 630                     sym = symstack.pop()
 631                     #--! TRACKING
 632                     if tracking:
 633                         lookahead.lineno = sym.lineno
 634                         lookahead.lexpos = sym.lexpos
 635                     #--! TRACKING
 636                     statestack.pop()
 637                     state = statestack[-1]
 638
 639                 continue
 640
 641             # Call an error function here
 642             raise RuntimeError('yacc: internal parser error!!!\n')
 643
 644         #--! parsedebug-end
 645
 646     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 647     # parseopt().
 648     #
 649     # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY!
 650     # This code is automatically generated by the ply/ygen.py script. Make
 651     # changes to the parsedebug() method instead.
 652     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 653
 654     def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 655         #--! parseopt-start
 656         lookahead = None                         # Current lookahead symbol
 657         lookaheadstack = []                      # Stack of lookahead symbols
 658         actions = self.action                    # Local reference to action table (to avoid lookup on self.)
 659         goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
 660         prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
 661         defaulted_states = self.defaulted_states # Local reference to defaulted states
 662         pslice  = YaccProduction(None)           # Production object passed to grammar rules
 663         errorcount = 0                           # Used during error recovery
 664
 665
 666         # If no lexer was given, we will try to use the lex module
 667         if not lexer:
 668             from . import lex
 669             lexer = lex.lexer
 670
 671         # Set up the lexer and parser objects on pslice
 672         pslice.lexer = lexer
 673         pslice.parser = self
 674
 675         # If input was supplied, pass to lexer
 676         if input is not None:
 677             lexer.input(input)
 678
 679         if tokenfunc is None:
 680             # Tokenize function
 681             get_token = lexer.token
 682         else:
 683             get_token = tokenfunc
 684
 685         # Set the parser() token method (sometimes used in error recovery)
 686         self.token = get_token
 687
 688         # Set up the state and symbol stacks
 689
 690         statestack = []                # Stack of parsing states
 691         self.statestack = statestack
 692         symstack   = []                # Stack of grammar symbols
 693         self.symstack = symstack
 694
 695         pslice.stack = symstack         # Put in the production
 696         errtoken   = None               # Err token
 697
 698         # The start state is assumed to be (0,$end)
 699
 700         statestack.append(0)
 701         sym = YaccSymbol()
 702         sym.type = '$end'
 703         symstack.append(sym)
 704         state = 0
 705         while True:
 706             # Get the next symbol on the input.  If a lookahead symbol
 707             # is already set, we just use that. Otherwise, we'll pull
 708             # the next token off of the lookaheadstack or from the lexer
 709
 710
 711             if state not in defaulted_states:
 712                 if not lookahead:
 713                     if not lookaheadstack:
 714                         lookahead = get_token()     # Get the next token
 715                     else:
 716                         lookahead = lookaheadstack.pop()
 717                     if not lookahead:
 718                         lookahead = YaccSymbol()
 719                         lookahead.type = '$end'
 720
 721                 # Check the action table
 722                 ltype = lookahead.type
 723                 t = actions[state].get(ltype)
 724             else:
 725                 t = defaulted_states[state]
 726
 727
 728             if t is not None:
 729                 if t > 0:
 730                     # shift a symbol on the stack
 731                     statestack.append(t)
 732                     state = t
 733
 734
 735                     symstack.append(lookahead)
 736                     lookahead = None
 737
 738                     # Decrease error count on successful shift
 739                     if errorcount:
 740                         errorcount -= 1
 741                     continue
 742
 743                 if t < 0:
 744                     # reduce a symbol on the stack, emit a production
 745                     p = prod[-t]
 746                     pname = p.name
 747                     plen  = p.len
 748
 749                     # Get production function
 750                     sym = YaccSymbol()
 751                     sym.type = pname       # Production name
 752                     sym.value = None
 753
 754
 755                     if plen:
 756                         targ = symstack[-plen-1:]
 757                         targ[0] = sym
 758
 759                         #--! TRACKING
 760                         if tracking:
 761                             t1 = targ[1]
 762                             sym.lineno = t1.lineno
 763                             sym.lexpos = t1.lexpos
 764                             t1 = targ[-1]
 765                             sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
 766                             sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
 767                         #--! TRACKING
 768
 769                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 770                         # The code enclosed in this section is duplicated
 771                         # below as a performance optimization.  Make sure
 772                         # changes get made in both locations.
 773
 774                         pslice.slice = targ
 775
 776                         try:
 777                             # Call the grammar rule with our special slice object
 778                             del symstack[-plen:]
 779                             self.state = state
 780                             p.callable(pslice)
 781                             del statestack[-plen:]
 782                             symstack.append(sym)
 783                             state = goto[statestack[-1]][pname]
 784                             statestack.append(state)
 785                         except SyntaxError:
 786                             # If an error was set. Enter error recovery state
 787                             lookaheadstack.append(lookahead)    # Save the current lookahead token
 788                             symstack.extend(targ[1:-1])         # Put the production slice back on the stack
 789                             statestack.pop()                    # Pop back one state (before the reduce)
 790                             state = statestack[-1]
 791                             sym.type = 'error'
 792                             sym.value = 'error'
 793                             lookahead = sym
 794                             errorcount = error_count
 795                             self.errorok = False
 796
 797                         continue
 798                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 799
 800                     else:
 801
 802                         #--! TRACKING
 803                         if tracking:
 804                             sym.lineno = lexer.lineno
 805                             sym.lexpos = lexer.lexpos
 806                         #--! TRACKING
 807
 808                         targ = [sym]
 809
 810                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 811                         # The code enclosed in this section is duplicated
 812                         # above as a performance optimization.  Make sure
 813                         # changes get made in both locations.
 814
 815                         pslice.slice = targ
 816
 817                         try:
 818                             # Call the grammar rule with our special slice object
 819                             self.state = state
 820                             p.callable(pslice)
 821                             symstack.append(sym)
 822                             state = goto[statestack[-1]][pname]
 823                             statestack.append(state)
 824                         except SyntaxError:
 825                             # If an error was set. Enter error recovery state
 826                             lookaheadstack.append(lookahead)    # Save the current lookahead token
 827                             statestack.pop()                    # Pop back one state (before the reduce)
 828                             state = statestack[-1]
 829                             sym.type = 'error'
 830                             sym.value = 'error'
 831                             lookahead = sym
 832                             errorcount = error_count
 833                             self.errorok = False
 834
 835                         continue
 836                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 837
 838                 if t == 0:
 839                     n = symstack[-1]
 840                     result = getattr(n, 'value', None)
 841                     return result
 842
 843             if t is None:
 844
 845
 846                 # We have some kind of parsing error here.  To handle
 847                 # this, we are going to push the current token onto
 848                 # the tokenstack and replace it with an 'error' token.
 849                 # If there are any synchronization rules, they may
 850                 # catch it.
 851                 #
 852                 # In addition to pushing the error token, we call call
 853                 # the user defined p_error() function if this is the
 854                 # first syntax error.  This function is only called if
 855                 # errorcount == 0.
 856                 if errorcount == 0 or self.errorok:
 857                     errorcount = error_count
 858                     self.errorok = False
 859                     errtoken = lookahead
 860                     if errtoken.type == '$end':
 861                         errtoken = None               # End of file!
 862                     if self.errorfunc:
 863                         if errtoken and not hasattr(errtoken, 'lexer'):
 864                             errtoken.lexer = lexer
 865                         self.state = state
 866                         tok = call_errorfunc(self.errorfunc, errtoken, self)
 867                         if self.errorok:
 868                             # User must have done some kind of panic
 869                             # mode recovery on their own.  The
 870                             # returned token is the next lookahead
 871                             lookahead = tok
 872                             errtoken = None
 873                             continue
 874                     else:
 875                         if errtoken:
 876                             if hasattr(errtoken, 'lineno'):
 877                                 lineno = lookahead.lineno
 878                             else:
 879                                 lineno = 0
 880                             if lineno:
 881                                 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
 882                             else:
 883                                 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
 884                         else:
 885                             sys.stderr.write('yacc: Parse error in input. EOF\n')
 886                             return
 887
 888                 else:
 889                     errorcount = error_count
 890
 891                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 892                 # entire parse has been rolled back and we're completely hosed.   The token is
 893                 # discarded and we just keep going.
 894
 895                 if len(statestack) <= 1 and lookahead.type != '$end':
 896                     lookahead = None
 897                     errtoken = None
 898                     state = 0
 899                     # Nuke the pushback stack
 900                     del lookaheadstack[:]
 901                     continue
 902
 903                 # case 2: the statestack has a couple of entries on it, but we're
 904                 # at the end of the file. nuke the top entry and generate an error token
 905
 906                 # Start nuking entries on the stack
 907                 if lookahead.type == '$end':
 908                     # Whoa. We're really hosed here. Bail out
 909                     return
 910
 911                 if lookahead.type != 'error':
 912                     sym = symstack[-1]
 913                     if sym.type == 'error':
 914                         # Hmmm. Error is on top of stack, we'll just nuke input
 915                         # symbol and continue
 916                         #--! TRACKING
 917                         if tracking:
 918                             sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
 919                             sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
 920                         #--! TRACKING
 921                         lookahead = None
 922                         continue
 923
 924                     # Create the error symbol for the first time and make it the new lookahead symbol
 925                     t = YaccSymbol()
 926                     t.type = 'error'
 927
 928                     if hasattr(lookahead, 'lineno'):
 929                         t.lineno = t.endlineno = lookahead.lineno
 930                     if hasattr(lookahead, 'lexpos'):
 931                         t.lexpos = t.endlexpos = lookahead.lexpos
 932                     t.value = lookahead
 933                     lookaheadstack.append(lookahead)
 934                     lookahead = t
 935                 else:
 936                     sym = symstack.pop()
 937                     #--! TRACKING
 938                     if tracking:
 939                         lookahead.lineno = sym.lineno
 940                         lookahead.lexpos = sym.lexpos
 941                     #--! TRACKING
 942                     statestack.pop()
 943                     state = statestack[-1]
 944
 945                 continue
 946
 947             # Call an error function here
 948             raise RuntimeError('yacc: internal parser error!!!\n')
 949
 950         #--! parseopt-end
 951
 952     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 953     # parseopt_notrack().
 954     #
 955     # Optimized version of parseopt() with line number tracking removed.
 956     # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
 957     # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
 958     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 959
 960     def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 961         #--! parseopt-notrack-start
 962         lookahead = None                         # Current lookahead symbol
 963         lookaheadstack = []                      # Stack of lookahead symbols
 964         actions = self.action                    # Local reference to action table (to avoid lookup on self.)
 965         goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
 966         prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
 967         defaulted_states = self.defaulted_states # Local reference to defaulted states
 968         pslice  = YaccProduction(None)           # Production object passed to grammar rules
 969         errorcount = 0                           # Used during error recovery
 970
 971
 972         # If no lexer was given, we will try to use the lex module
 973         if not lexer:
 974             from . import lex
 975             lexer = lex.lexer
 976
 977         # Set up the lexer and parser objects on pslice
 978         pslice.lexer = lexer
 979         pslice.parser = self
 980
 981         # If input was supplied, pass to lexer
 982         if input is not None:
 983             lexer.input(input)
 984
 985         if tokenfunc is None:
 986             # Tokenize function
 987             get_token = lexer.token
 988         else:
 989             get_token = tokenfunc
 990
 991         # Set the parser() token method (sometimes used in error recovery)
 992         self.token = get_token
 993
 994         # Set up the state and symbol stacks
 995
 996         statestack = []                # Stack of parsing states
 997         self.statestack = statestack
 998         symstack   = []                # Stack of grammar symbols
 999         self.symstack = symstack
1000
1001         pslice.stack = symstack         # Put in the production
1002         errtoken   = None               # Err token
1003
1004         # The start state is assumed to be (0,$end)
1005
1006         statestack.append(0)
1007         sym = YaccSymbol()
1008         sym.type = '$end'
1009         symstack.append(sym)
1010         state = 0
1011         while True:
1012             # Get the next symbol on the input.  If a lookahead symbol
1013             # is already set, we just use that. Otherwise, we'll pull
1014             # the next token off of the lookaheadstack or from the lexer
1015
1016
1017             if state not in defaulted_states:
1018                 if not lookahead:
1019                     if not lookaheadstack:
1020                         lookahead = get_token()     # Get the next token
1021                     else:
1022                         lookahead = lookaheadstack.pop()
1023                     if not lookahead:
1024                         lookahead = YaccSymbol()
1025                         lookahead.type = '$end'
1026
1027                 # Check the action table
1028                 ltype = lookahead.type
1029                 t = actions[state].get(ltype)
1030             else:
1031                 t = defaulted_states[state]
1032
1033
1034             if t is not None:
1035                 if t > 0:
1036                     # shift a symbol on the stack
1037                     statestack.append(t)
1038                     state = t
1039
1040
1041                     symstack.append(lookahead)
1042                     lookahead = None
1043
1044                     # Decrease error count on successful shift
1045                     if errorcount:
1046                         errorcount -= 1
1047                     continue
1048
1049                 if t < 0:
1050                     # reduce a symbol on the stack, emit a production
1051                     p = prod[-t]
1052                     pname = p.name
1053                     plen  = p.len
1054
1055                     # Get production function
1056                     sym = YaccSymbol()
1057                     sym.type = pname       # Production name
1058                     sym.value = None
1059
1060
1061                     if plen:
1062                         targ = symstack[-plen-1:]
1063                         targ[0] = sym
1064
1065
1066                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1067                         # The code enclosed in this section is duplicated
1068                         # below as a performance optimization.  Make sure
1069                         # changes get made in both locations.
1070
1071                         pslice.slice = targ
1072
1073                         try:
1074                             # Call the grammar rule with our special slice object
1075                             del symstack[-plen:]
1076                             self.state = state
1077                             p.callable(pslice)
1078                             del statestack[-plen:]
1079                             symstack.append(sym)
1080                             state = goto[statestack[-1]][pname]
1081                             statestack.append(state)
1082                         except SyntaxError:
1083                             # If an error was set. Enter error recovery state
1084                             lookaheadstack.append(lookahead)    # Save the current lookahead token
1085                             symstack.extend(targ[1:-1])         # Put the production slice back on the stack
1086                             statestack.pop()                    # Pop back one state (before the reduce)
1087                             state = statestack[-1]
1088                             sym.type = 'error'
1089                             sym.value = 'error'
1090                             lookahead = sym
1091                             errorcount = error_count
1092                             self.errorok = False
1093
1094                         continue
1095                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1096
1097                     else:
1098
1099
1100                         targ = [sym]
1101
1102                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1103                         # The code enclosed in this section is duplicated
1104                         # above as a performance optimization.  Make sure
1105                         # changes get made in both locations.
1106
1107                         pslice.slice = targ
1108
1109                         try:
1110                             # Call the grammar rule with our special slice object
1111                             self.state = state
1112                             p.callable(pslice)
1113                             symstack.append(sym)
1114                             state = goto[statestack[-1]][pname]
1115                             statestack.append(state)
1116                         except SyntaxError:
1117                             # If an error was set. Enter error recovery state
1118                             lookaheadstack.append(lookahead)    # Save the current lookahead token
1119                             statestack.pop()                    # Pop back one state (before the reduce)
1120                             state = statestack[-1]
1121                             sym.type = 'error'
1122                             sym.value = 'error'
1123                             lookahead = sym
1124                             errorcount = error_count
1125                             self.errorok = False
1126
1127                         continue
1128                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1129
1130                 if t == 0:
1131                     n = symstack[-1]
1132                     result = getattr(n, 'value', None)
1133                     return result
1134
1135             if t is None:
1136
1137
1138                 # We have some kind of parsing error here.  To handle
1139                 # this, we are going to push the current token onto
1140                 # the tokenstack and replace it with an 'error' token.
1141                 # If there are any synchronization rules, they may
1142                 # catch it.
1143                 #
1144                 # In addition to pushing the error token, we call call
1145                 # the user defined p_error() function if this is the
1146                 # first syntax error.  This function is only called if
1147                 # errorcount == 0.
1148                 if errorcount == 0 or self.errorok:
1149                     errorcount = error_count
1150                     self.errorok = False
1151                     errtoken = lookahead
1152                     if errtoken.type == '$end':
1153                         errtoken = None               # End of file!
1154                     if self.errorfunc:
1155                         if errtoken and not hasattr(errtoken, 'lexer'):
1156                             errtoken.lexer = lexer
1157                         self.state = state
1158                         tok = call_errorfunc(self.errorfunc, errtoken, self)
1159                         if self.errorok:
1160                             # User must have done some kind of panic
1161                             # mode recovery on their own.  The
1162                             # returned token is the next lookahead
1163                             lookahead = tok
1164                             errtoken = None
1165                             continue
1166                     else:
1167                         if errtoken:
1168                             if hasattr(errtoken, 'lineno'):
1169                                 lineno = lookahead.lineno
1170                             else:
1171                                 lineno = 0
1172                             if lineno:
1173                                 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1174                             else:
1175                                 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1176                         else:
1177                             sys.stderr.write('yacc: Parse error in input. EOF\n')
1178                             return
1179
1180                 else:
1181                     errorcount = error_count
1182
1183                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
1184                 # entire parse has been rolled back and we're completely hosed.   The token is
1185                 # discarded and we just keep going.
1186
1187                 if len(statestack) <= 1 and lookahead.type != '$end':
1188                     lookahead = None
1189                     errtoken = None
1190                     state = 0
1191                     # Nuke the pushback stack
1192                     del lookaheadstack[:]
1193                     continue
1194
1195                 # case 2: the statestack has a couple of entries on it, but we're
1196                 # at the end of the file. nuke the top entry and generate an error token
1197
1198                 # Start nuking entries on the stack
1199                 if lookahead.type == '$end':
1200                     # Whoa. We're really hosed here. Bail out
1201                     return
1202
1203                 if lookahead.type != 'error':
1204                     sym = symstack[-1]
1205                     if sym.type == 'error':
1206                         # Hmmm. Error is on top of stack, we'll just nuke input
1207                         # symbol and continue
1208                         lookahead = None
1209                         continue
1210
1211                     # Create the error symbol for the first time and make it the new lookahead symbol
1212                     t = YaccSymbol()
1213                     t.type = 'error'
1214
1215                     if hasattr(lookahead, 'lineno'):
1216                         t.lineno = t.endlineno = lookahead.lineno
1217                     if hasattr(lookahead, 'lexpos'):
1218                         t.lexpos = t.endlexpos = lookahead.lexpos
1219                     t.value = lookahead
1220                     lookaheadstack.append(lookahead)
1221                     lookahead = t
1222                 else:
1223                     sym = symstack.pop()
1224                     statestack.pop()
1225                     state = statestack[-1]
1226
1227                 continue
1228
1229             # Call an error function here
1230             raise RuntimeError('yacc: internal parser error!!!\n')
1231
1232         #--! parseopt-notrack-end
1233
1234 # -----------------------------------------------------------------------------
1235 #                          === Grammar Representation ===
1236 #
1237 # The following functions, classes, and variables are used to represent and
1238 # manipulate the rules that make up a grammar.
1239 # -----------------------------------------------------------------------------
1240
1241 # regex matching identifiers
1242 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1243
1244 # -----------------------------------------------------------------------------
1245 # class Production:
1246 #
1247 # This class stores the raw information about a single production or grammar rule.
1248 # A grammar rule refers to a specification such as this:
1249 #
1250 #       expr : expr PLUS term
1251 #
1252 # Here are the basic attributes defined on all productions
1253 #
1254 #       name     - Name of the production.  For example 'expr'
1255 #       prod     - A list of symbols on the right side ['expr','PLUS','term']
1256 #       prec     - Production precedence level
1257 #       number   - Production number.
1258 #       func     - Function that executes on reduce
1259 #       file     - File where production function is defined
1260 #       lineno   - Line number where production function is defined
1261 #
1262 # The following attributes are defined or optional.
1263 #
1264 #       len       - Length of the production (number of symbols on right hand side)
1265 #       usyms     - Set of unique symbols found in the production
1266 # -----------------------------------------------------------------------------
1267
1268 class Production(object):
1269     reduced = 0
1270     def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1271         self.name     = name
1272         self.prod     = tuple(prod)
1273         self.number   = number
1274         self.func     = func
1275         self.callable = None
1276         self.file     = file
1277         self.line     = line
1278         self.prec     = precedence
1279
1280         # Internal settings used during table construction
1281
1282         self.len  = len(self.prod)   # Length of the production
1283
1284         # Create a list of unique production symbols used in the production
1285         self.usyms = []
1286         for s in self.prod:
1287             if s not in self.usyms:
1288                 self.usyms.append(s)
1289
1290         # List of all LR items for the production
1291         self.lr_items = []
1292         self.lr_next = None
1293
1294         # Create a string representation
1295         if self.prod:
1296             self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1297         else:
1298             self.str = '%s -> <empty>' % self.name
1299
1300     def __str__(self):
1301         return self.str
1302
1303     def __repr__(self):
1304         return 'Production(' + str(self) + ')'
1305
1306     def __len__(self):
1307         return len(self.prod)
1308
1309     def __nonzero__(self):
1310         return 1
1311
1312     def __getitem__(self, index):
1313         return self.prod[index]
1314
1315     # Return the nth lr_item from the production (or None if at the end)
1316     def lr_item(self, n):
1317         if n > len(self.prod):
1318             return None
1319         p = LRItem(self, n)
1320         # Precompute the list of productions immediately following.
1321         try:
1322             p.lr_after = self.Prodnames[p.prod[n+1]]
1323         except (IndexError, KeyError):
1324             p.lr_after = []
1325         try:
1326             p.lr_before = p.prod[n-1]
1327         except IndexError:
1328             p.lr_before = None
1329         return p
1330
1331     # Bind the production function name to a callable
1332     def bind(self, pdict):
1333         if self.func:
1334             self.callable = pdict[self.func]
1335
1336 # This class serves as a minimal standin for Production objects when
1337 # reading table data from files.   It only contains information
1338 # actually used by the LR parsing engine, plus some additional
1339 # debugging information.
1340 class MiniProduction(object):
1341     def __init__(self, str, name, len, func, file, line):
1342         self.name     = name
1343         self.len      = len
1344         self.func     = func
1345         self.callable = None
1346         self.file     = file
1347         self.line     = line
1348         self.str      = str
1349
1350     def __str__(self):
1351         return self.str
1352
1353     def __repr__(self):
1354         return 'MiniProduction(%s)' % self.str
1355
1356     # Bind the production function name to a callable
1357     def bind(self, pdict):
1358         if self.func:
1359             self.callable = pdict[self.func]
1360
1361
1362 # -----------------------------------------------------------------------------
1363 # class LRItem
1364 #
1365 # This class represents a specific stage of parsing a production rule.  For
1366 # example:
1367 #
1368 #       expr : expr . PLUS term
1369 #
1370 # In the above, the "." represents the current location of the parse.  Here
1371 # basic attributes:
1372 #
1373 #       name       - Name of the production.  For example 'expr'
1374 #       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
1375 #       number     - Production number.
1376 #
1377 #       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
1378 #                    then lr_next refers to 'expr -> expr PLUS . term'
1379 #       lr_index   - LR item index (location of the ".") in the prod list.
1380 #       lookaheads - LALR lookahead symbols for this item
1381 #       len        - Length of the production (number of symbols on right hand side)
1382 #       lr_after    - List of all productions that immediately follow
1383 #       lr_before   - Grammar symbol immediately before
1384 # -----------------------------------------------------------------------------
1385
1386 class LRItem(object):
1387     def __init__(self, p, n):
1388         self.name       = p.name
1389         self.prod       = list(p.prod)
1390         self.number     = p.number
1391         self.lr_index   = n
1392         self.lookaheads = {}
1393         self.prod.insert(n, '.')
1394         self.prod       = tuple(self.prod)
1395         self.len        = len(self.prod)
1396         self.usyms      = p.usyms
1397
1398     def __str__(self):
1399         if self.prod:
1400             s = '%s -> %s' % (self.name, ' '.join(self.prod))
1401         else:
1402             s = '%s -> <empty>' % self.name
1403         return s
1404
1405     def __repr__(self):
1406         return 'LRItem(' + str(self) + ')'
1407
1408 # -----------------------------------------------------------------------------
1409 # rightmost_terminal()
1410 #
1411 # Return the rightmost terminal from a list of symbols.  Used in add_production()
1412 # -----------------------------------------------------------------------------
1413 def rightmost_terminal(symbols, terminals):
1414     i = len(symbols) - 1
1415     while i >= 0:
1416         if symbols[i] in terminals:
1417             return symbols[i]
1418         i -= 1
1419     return None
1420
1421 # -----------------------------------------------------------------------------
1422 #                           === GRAMMAR CLASS ===
1423 #
1424 # The following class represents the contents of the specified grammar along
1425 # with various computed properties such as first sets, follow sets, LR items, etc.
1426 # This data is used for critical parts of the table generation process later.
1427 # -----------------------------------------------------------------------------
1428
1429 class GrammarError(YaccError):
1430     pass
1431
1432 class Grammar(object):
1433     def __init__(self, terminals):
1434         self.Productions  = [None]  # A list of all of the productions.  The first
1435                                     # entry is always reserved for the purpose of
1436                                     # building an augmented grammar
1437
1438         self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
1439                                     # productions of that nonterminal.
1440
1441         self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
1442                                     # productions.
1443
1444         self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
1445                                     # list of the rules where they are used.
1446
1447         for term in terminals:
1448             self.Terminals[term] = []
1449
1450         self.Terminals['error'] = []
1451
1452         self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
1453                                     # of rule numbers where they are used.
1454
1455         self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
1456
1457         self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
1458
1459         self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
1460                                     # form ('right',level) or ('nonassoc', level) or ('left',level)
1461
1462         self.UsedPrecedence = set() # Precedence rules that were actually used by the grammar.
1463                                     # This is only used to provide error checking and to generate
1464                                     # a warning about unused precedence rules.
1465
1466         self.Start = None           # Starting symbol for the grammar
1467
1468
1469     def __len__(self):
1470         return len(self.Productions)
1471
1472     def __getitem__(self, index):
1473         return self.Productions[index]
1474
1475     # -----------------------------------------------------------------------------
1476     # set_precedence()
1477     #
1478     # Sets the precedence for a given terminal. assoc is the associativity such as
1479     # 'left','right', or 'nonassoc'.  level is a numeric level.
1480     #
1481     # -----------------------------------------------------------------------------
1482
1483     def set_precedence(self, term, assoc, level):
1484         assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1485         if term in self.Precedence:
1486             raise GrammarError('Precedence already specified for terminal %r' % term)
1487         if assoc not in ['left', 'right', 'nonassoc']:
1488             raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1489         self.Precedence[term] = (assoc, level)
1490
1491     # -----------------------------------------------------------------------------
1492     # add_production()
1493     #
1494     # Given an action function, this function assembles a production rule and
1495     # computes its precedence level.
1496     #
1497     # The production rule is supplied as a list of symbols.   For example,
1498     # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1499     # symbols ['expr','PLUS','term'].
1500     #
1501     # Precedence is determined by the precedence of the right-most non-terminal
1502     # or the precedence of a terminal specified by %prec.
1503     #
1504     # A variety of error checks are performed to make sure production symbols
1505     # are valid and that %prec is used correctly.
1506     # -----------------------------------------------------------------------------
1507
1508     def add_production(self, prodname, syms, func=None, file='', line=0):
1509
1510         if prodname in self.Terminals:
1511             raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1512         if prodname == 'error':
1513             raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1514         if not _is_identifier.match(prodname):
1515             raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1516
1517         # Look for literal tokens
1518         for n, s in enumerate(syms):
1519             if s[0] in "'\"":
1520                 try:
1521                     c = eval(s)
1522                     if (len(c) > 1):
1523                         raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1524                                            (file, line, s, prodname))
1525                     if c not in self.Terminals:
1526                         self.Terminals[c] = []
1527                     syms[n] = c
1528                     continue
1529                 except SyntaxError:
1530                     pass
1531             if not _is_identifier.match(s) and s != '%prec':
1532                 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1533
1534         # Determine the precedence level
1535         if '%prec' in syms:
1536             if syms[-1] == '%prec':
1537                 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1538             if syms[-2] != '%prec':
1539                 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1540                                    (file, line))
1541             precname = syms[-1]
1542             prodprec = self.Precedence.get(precname)
1543             if not prodprec:
1544                 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1545             else:
1546                 self.UsedPrecedence.add(precname)
1547             del syms[-2:]     # Drop %prec from the rule
1548         else:
1549             # If no %prec, precedence is determined by the rightmost terminal symbol
1550             precname = rightmost_terminal(syms, self.Terminals)
1551             prodprec = self.Precedence.get(precname, ('right', 0))
1552
1553         # See if the rule is already in the rulemap
1554         map = '%s -> %s' % (prodname, syms)
1555         if map in self.Prodmap:
1556             m = self.Prodmap[map]
1557             raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1558                                'Previous definition at %s:%d' % (m.file, m.line))
1559
1560         # From this point on, everything is valid.  Create a new Production instance
1561         pnumber  = len(self.Productions)
1562         if prodname not in self.Nonterminals:
1563             self.Nonterminals[prodname] = []
1564
1565         # Add the production number to Terminals and Nonterminals
1566         for t in syms:
1567             if t in self.Terminals:
1568                 self.Terminals[t].append(pnumber)
1569             else:
1570                 if t not in self.Nonterminals:
1571                     self.Nonterminals[t] = []
1572                 self.Nonterminals[t].append(pnumber)
1573
1574         # Create a production and add it to the list of productions
1575         p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1576         self.Productions.append(p)
1577         self.Prodmap[map] = p
1578
1579         # Add to the global productions list
1580         try:
1581             self.Prodnames[prodname].append(p)
1582         except KeyError:
1583             self.Prodnames[prodname] = [p]
1584
1585     # -----------------------------------------------------------------------------
1586     # set_start()
1587     #
1588     # Sets the starting symbol and creates the augmented grammar.  Production
1589     # rule 0 is S' -> start where start is the start symbol.
1590     # -----------------------------------------------------------------------------
1591
1592     def set_start(self, start=None):
1593         if not start:
1594             start = self.Productions[1].name
1595         if start not in self.Nonterminals:
1596             raise GrammarError('start symbol %s undefined' % start)
1597         self.Productions[0] = Production(0, "S'", [start])
1598         self.Nonterminals[start].append(0)
1599         self.Start = start
1600
1601     # -----------------------------------------------------------------------------
1602     # find_unreachable()
1603     #
1604     # Find all of the nonterminal symbols that can't be reached from the starting
1605     # symbol.  Returns a list of nonterminals that can't be reached.
1606     # -----------------------------------------------------------------------------
1607
1608     def find_unreachable(self):
1609
1610         # Mark all symbols that are reachable from a symbol s
1611         def mark_reachable_from(s):
1612             if s in reachable:
1613                 return
1614             reachable.add(s)
1615             for p in self.Prodnames.get(s, []):
1616                 for r in p.prod:
1617                     mark_reachable_from(r)
1618
1619         reachable = set()
1620         mark_reachable_from(self.Productions[0].prod[0])
1621         return [s for s in self.Nonterminals if s not in reachable]
1622
1623     # -----------------------------------------------------------------------------
1624     # infinite_cycles()
1625     #
1626     # This function looks at the various parsing rules and tries to detect
1627     # infinite recursion cycles (grammar rules where there is no possible way
1628     # to derive a string of only terminals).
1629     # -----------------------------------------------------------------------------
1630
1631     def infinite_cycles(self):
1632         terminates = {}
1633
1634         # Terminals:
1635         for t in self.Terminals:
1636             terminates[t] = True
1637
1638         terminates['$end'] = True
1639
1640         # Nonterminals:
1641
1642         # Initialize to false:
1643         for n in self.Nonterminals:
1644             terminates[n] = False
1645
1646         # Then propagate termination until no change:
1647         while True:
1648             some_change = False
1649             for (n, pl) in self.Prodnames.items():
1650                 # Nonterminal n terminates iff any of its productions terminates.
1651                 for p in pl:
1652                     # Production p terminates iff all of its rhs symbols terminate.
1653                     for s in p.prod:
1654                         if not terminates[s]:
1655                             # The symbol s does not terminate,
1656                             # so production p does not terminate.
1657                             p_terminates = False
1658                             break
1659                     else:
1660                         # didn't break from the loop,
1661                         # so every symbol s terminates
1662                         # so production p terminates.
1663                         p_terminates = True
1664
1665                     if p_terminates:
1666                         # symbol n terminates!
1667                         if not terminates[n]:
1668                             terminates[n] = True
1669                             some_change = True
1670                         # Don't need to consider any more productions for this n.
1671                         break
1672
1673             if not some_change:
1674                 break
1675
1676         infinite = []
1677         for (s, term) in terminates.items():
1678             if not term:
1679                 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1680                     # s is used-but-not-defined, and we've already warned of that,
1681                     # so it would be overkill to say that it's also non-terminating.
1682                     pass
1683                 else:
1684                     infinite.append(s)
1685
1686         return infinite
1687
1688     # -----------------------------------------------------------------------------
1689     # undefined_symbols()
1690     #
1691     # Find all symbols that were used the grammar, but not defined as tokens or
1692     # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1693     # and prod is the production where the symbol was used.
1694     # -----------------------------------------------------------------------------
1695     def undefined_symbols(self):
1696         result = []
1697         for p in self.Productions:
1698             if not p:
1699                 continue
1700
1701             for s in p.prod:
1702                 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1703                     result.append((s, p))
1704         return result
1705
1706     # -----------------------------------------------------------------------------
1707     # unused_terminals()
1708     #
1709     # Find all terminals that were defined, but not used by the grammar.  Returns
1710     # a list of all symbols.
1711     # -----------------------------------------------------------------------------
1712     def unused_terminals(self):
1713         unused_tok = []
1714         for s, v in self.Terminals.items():
1715             if s != 'error' and not v:
1716                 unused_tok.append(s)
1717
1718         return unused_tok
1719
1720     # ------------------------------------------------------------------------------
1721     # unused_rules()
1722     #
1723     # Find all grammar rules that were defined,  but not used (maybe not reachable)
1724     # Returns a list of productions.
1725     # ------------------------------------------------------------------------------
1726
1727     def unused_rules(self):
1728         unused_prod = []
1729         for s, v in self.Nonterminals.items():
1730             if not v:
1731                 p = self.Prodnames[s][0]
1732                 unused_prod.append(p)
1733         return unused_prod
1734
1735     # -----------------------------------------------------------------------------
1736     # unused_precedence()
1737     #
1738     # Returns a list of tuples (term,precedence) corresponding to precedence
1739     # rules that were never used by the grammar.  term is the name of the terminal
1740     # on which precedence was applied and precedence is a string such as 'left' or
1741     # 'right' corresponding to the type of precedence.
1742     # -----------------------------------------------------------------------------
1743
1744     def unused_precedence(self):
1745         unused = []
1746         for termname in self.Precedence:
1747             if not (termname in self.Terminals or termname in self.UsedPrecedence):
1748                 unused.append((termname, self.Precedence[termname][0]))
1749
1750         return unused
1751
1752     # -------------------------------------------------------------------------
1753     # _first()
1754     #
1755     # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1756     #
1757     # During execution of compute_first1, the result may be incomplete.
1758     # Afterward (e.g., when called from compute_follow()), it will be complete.
1759     # -------------------------------------------------------------------------
1760     def _first(self, beta):
1761
1762         # We are computing First(x1,x2,x3,...,xn)
1763         result = []
1764         for x in beta:
1765             x_produces_empty = False
1766
1767             # Add all the non-<empty> symbols of First[x] to the result.
1768             for f in self.First[x]:
1769                 if f == '<empty>':
1770                     x_produces_empty = True
1771                 else:
1772                     if f not in result:
1773                         result.append(f)
1774
1775             if x_produces_empty:
1776                 # We have to consider the next x in beta,
1777                 # i.e. stay in the loop.
1778                 pass
1779             else:
1780                 # We don't have to consider any further symbols in beta.
1781                 break
1782         else:
1783             # There was no 'break' from the loop,
1784             # so x_produces_empty was true for all x in beta,
1785             # so beta produces empty as well.
1786             result.append('<empty>')
1787
1788         return result
1789
1790     # -------------------------------------------------------------------------
1791     # compute_first()
1792     #
1793     # Compute the value of FIRST1(X) for all symbols
1794     # -------------------------------------------------------------------------
1795     def compute_first(self):
1796         if self.First:
1797             return self.First
1798
1799         # Terminals:
1800         for t in self.Terminals:
1801             self.First[t] = [t]
1802
1803         self.First['$end'] = ['$end']
1804
1805         # Nonterminals:
1806
1807         # Initialize to the empty set:
1808         for n in self.Nonterminals:
1809             self.First[n] = []
1810
1811         # Then propagate symbols until no change:
1812         while True:
1813             some_change = False
1814             for n in self.Nonterminals:
1815                 for p in self.Prodnames[n]:
1816                     for f in self._first(p.prod):
1817                         if f not in self.First[n]:
1818                             self.First[n].append(f)
1819                             some_change = True
1820             if not some_change:
1821                 break
1822
1823         return self.First
1824
1825     # ---------------------------------------------------------------------
1826     # compute_follow()
1827     #
1828     # Computes all of the follow sets for every non-terminal symbol.  The
1829     # follow set is the set of all symbols that might follow a given
1830     # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1831     # ---------------------------------------------------------------------
1832     def compute_follow(self, start=None):
1833         # If already computed, return the result
1834         if self.Follow:
1835             return self.Follow
1836
1837         # If first sets not computed yet, do that first.
1838         if not self.First:
1839             self.compute_first()
1840
1841         # Add '$end' to the follow list of the start symbol
1842         for k in self.Nonterminals:
1843             self.Follow[k] = []
1844
1845         if not start:
1846             start = self.Productions[1].name
1847
1848         self.Follow[start] = ['$end']
1849
1850         while True:
1851             didadd = False
1852             for p in self.Productions[1:]:
1853                 # Here is the production set
1854                 for i, B in enumerate(p.prod):
1855                     if B in self.Nonterminals:
1856                         # Okay. We got a non-terminal in a production
1857                         fst = self._first(p.prod[i+1:])
1858                         hasempty = False
1859                         for f in fst:
1860                             if f != '<empty>' and f not in self.Follow[B]:
1861                                 self.Follow[B].append(f)
1862                                 didadd = True
1863                             if f == '<empty>':
1864                                 hasempty = True
1865                         if hasempty or i == (len(p.prod)-1):
1866                             # Add elements of follow(a) to follow(b)
1867                             for f in self.Follow[p.name]:
1868                                 if f not in self.Follow[B]:
1869                                     self.Follow[B].append(f)
1870                                     didadd = True
1871             if not didadd:
1872                 break
1873         return self.Follow
1874
1875
1876     # -----------------------------------------------------------------------------
1877     # build_lritems()
1878     #
1879     # This function walks the list of productions and builds a complete set of the
1880     # LR items.  The LR items are stored in two ways:  First, they are uniquely
1881     # numbered and placed in the list _lritems.  Second, a linked list of LR items
1882     # is built for each production.  For example:
1883     #
1884     #   E -> E PLUS E
1885     #
1886     # Creates the list
1887     #
1888     #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1889     # -----------------------------------------------------------------------------
1890
1891     def build_lritems(self):
1892         for p in self.Productions:
1893             lastlri = p
1894             i = 0
1895             lr_items = []
1896             while True:
1897                 if i > len(p):
1898                     lri = None
1899                 else:
1900                     lri = LRItem(p, i)
1901                     # Precompute the list of productions immediately following
1902                     try:
1903                         lri.lr_after = self.Prodnames[lri.prod[i+1]]
1904                     except (IndexError, KeyError):
1905                         lri.lr_after = []
1906                     try:
1907                         lri.lr_before = lri.prod[i-1]
1908                     except IndexError:
1909                         lri.lr_before = None
1910
1911                 lastlri.lr_next = lri
1912                 if not lri:
1913                     break
1914                 lr_items.append(lri)
1915                 lastlri = lri
1916                 i += 1
1917             p.lr_items = lr_items
1918
1919 # -----------------------------------------------------------------------------
1920 #                            == Class LRTable ==
1921 #
1922 # This basic class represents a basic table of LR parsing information.
1923 # Methods for generating the tables are not defined here.  They are defined
1924 # in the derived class LRGeneratedTable.
1925 # -----------------------------------------------------------------------------
1926
1927 class VersionError(YaccError):
1928     pass
1929
1930 class LRTable(object):
1931     def __init__(self):
1932         self.lr_action = None
1933         self.lr_goto = None
1934         self.lr_productions = None
1935         self.lr_method = None
1936
1937     def read_table(self, module):
1938         if isinstance(module, types.ModuleType):
1939             parsetab = module
1940         else:
1941             exec('import %s' % module)
1942             parsetab = sys.modules[module]
1943
1944         if parsetab._tabversion != __tabversion__:
1945             raise VersionError('yacc table file version is out of date')
1946
1947         self.lr_action = parsetab._lr_action
1948         self.lr_goto = parsetab._lr_goto
1949
1950         self.lr_productions = []
1951         for p in parsetab._lr_productions:
1952             self.lr_productions.append(MiniProduction(*p))
1953
1954         self.lr_method = parsetab._lr_method
1955         return parsetab._lr_signature
1956
1957     def read_pickle(self, filename):
1958         try:
1959             import cPickle as pickle
1960         except ImportError:
1961             import pickle
1962
1963         if not os.path.exists(filename):
1964           raise ImportError
1965
1966         in_f = open(filename, 'rb')
1967
1968         tabversion = pickle.load(in_f)
1969         if tabversion != __tabversion__:
1970             raise VersionError('yacc table file version is out of date')
1971         self.lr_method = pickle.load(in_f)
1972         signature      = pickle.load(in_f)
1973         self.lr_action = pickle.load(in_f)
1974         self.lr_goto   = pickle.load(in_f)
1975         productions    = pickle.load(in_f)
1976
1977         self.lr_productions = []
1978         for p in productions:
1979             self.lr_productions.append(MiniProduction(*p))
1980
1981         in_f.close()
1982         return signature
1983
1984     # Bind all production function names to callable objects in pdict
1985     def bind_callables(self, pdict):
1986         for p in self.lr_productions:
1987             p.bind(pdict)
1988
1989
1990 # -----------------------------------------------------------------------------
1991 #                           === LR Generator ===
1992 #
1993 # The following classes and functions are used to generate LR parsing tables on
1994 # a grammar.
1995 # -----------------------------------------------------------------------------
1996
1997 # -----------------------------------------------------------------------------
1998 # digraph()
1999 # traverse()
2000 #
2001 # The following two functions are used to compute set valued functions
2002 # of the form:
2003 #
2004 #     F(x) = F'(x) U U{F(y) | x R y}
2005 #
2006 # This is used to compute the values of Read() sets as well as FOLLOW sets
2007 # in LALR(1) generation.
2008 #
2009 # Inputs:  X    - An input set
2010 #          R    - A relation
2011 #          FP   - Set-valued function
2012 # ------------------------------------------------------------------------------
2013
2014 def digraph(X, R, FP):
2015     N = {}
2016     for x in X:
2017         N[x] = 0
2018     stack = []
2019     F = {}
2020     for x in X:
2021         if N[x] == 0:
2022             traverse(x, N, stack, F, X, R, FP)
2023     return F
2024
2025 def traverse(x, N, stack, F, X, R, FP):
2026     stack.append(x)
2027     d = len(stack)
2028     N[x] = d
2029     F[x] = FP(x)             # F(X) <- F'(x)
2030
2031     rel = R(x)               # Get y's related to x
2032     for y in rel:
2033         if N[y] == 0:
2034             traverse(y, N, stack, F, X, R, FP)
2035         N[x] = min(N[x], N[y])
2036         for a in F.get(y, []):
2037             if a not in F[x]:
2038                 F[x].append(a)
2039     if N[x] == d:
2040         N[stack[-1]] = MAXINT
2041         F[stack[-1]] = F[x]
2042         element = stack.pop()
2043         while element != x:
2044             N[stack[-1]] = MAXINT
2045             F[stack[-1]] = F[x]
2046             element = stack.pop()
2047
2048 class LALRError(YaccError):
2049     pass
2050
2051 # -----------------------------------------------------------------------------
2052 #                             == LRGeneratedTable ==
2053 #
2054 # This class implements the LR table generation algorithm.  There are no
2055 # public methods except for write()
2056 # -----------------------------------------------------------------------------
2057
2058 class LRGeneratedTable(LRTable):
2059     def __init__(self, grammar, method='LALR', log=None):
2060         if method not in ['SLR', 'LALR']:
2061             raise LALRError('Unsupported method %s' % method)
2062
2063         self.grammar = grammar
2064         self.lr_method = method
2065
2066         # Set up the logger
2067         if not log:
2068             log = NullLogger()
2069         self.log = log
2070
2071         # Internal attributes
2072         self.lr_action     = {}        # Action table
2073         self.lr_goto       = {}        # Goto table
2074         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
2075         self.lr_goto_cache = {}        # Cache of computed gotos
2076         self.lr0_cidhash   = {}        # Cache of closures
2077
2078         self._add_count    = 0         # Internal counter used to detect cycles
2079
2080         # Diagonistic information filled in by the table generator
2081         self.sr_conflict   = 0
2082         self.rr_conflict   = 0
2083         self.conflicts     = []        # List of conflicts
2084
2085         self.sr_conflicts  = []
2086         self.rr_conflicts  = []
2087
2088         # Build the tables
2089         self.grammar.build_lritems()
2090         self.grammar.compute_first()
2091         self.grammar.compute_follow()
2092         self.lr_parse_table()
2093
2094     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2095
2096     def lr0_closure(self, I):
2097         self._add_count += 1
2098
2099         # Add everything in I to J
2100         J = I[:]
2101         didadd = True
2102         while didadd:
2103             didadd = False
2104             for j in J:
2105                 for x in j.lr_after:
2106                     if getattr(x, 'lr0_added', 0) == self._add_count:
2107                         continue
2108                     # Add B --> .G to J
2109                     J.append(x.lr_next)
2110                     x.lr0_added = self._add_count
2111                     didadd = True
2112
2113         return J
2114
2115     # Compute the LR(0) goto function goto(I,X) where I is a set
2116     # of LR(0) items and X is a grammar symbol.   This function is written
2117     # in a way that guarantees uniqueness of the generated goto sets
2118     # (i.e. the same goto set will never be returned as two different Python
2119     # objects).  With uniqueness, we can later do fast set comparisons using
2120     # id(obj) instead of element-wise comparison.
2121
2122     def lr0_goto(self, I, x):
2123         # First we look for a previously cached entry
2124         g = self.lr_goto_cache.get((id(I), x))
2125         if g:
2126             return g
2127
2128         # Now we generate the goto set in a way that guarantees uniqueness
2129         # of the result
2130
2131         s = self.lr_goto_cache.get(x)
2132         if not s:
2133             s = {}
2134             self.lr_goto_cache[x] = s
2135
2136         gs = []
2137         for p in I:
2138             n = p.lr_next
2139             if n and n.lr_before == x:
2140                 s1 = s.get(id(n))
2141                 if not s1:
2142                     s1 = {}
2143                     s[id(n)] = s1
2144                 gs.append(n)
2145                 s = s1
2146         g = s.get('$end')
2147         if not g:
2148             if gs:
2149                 g = self.lr0_closure(gs)
2150                 s['$end'] = g
2151             else:
2152                 s['$end'] = gs
2153         self.lr_goto_cache[(id(I), x)] = g
2154         return g
2155
2156     # Compute the LR(0) sets of item function
2157     def lr0_items(self):
2158         C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2159         i = 0
2160         for I in C:
2161             self.lr0_cidhash[id(I)] = i
2162             i += 1
2163
2164         # Loop over the items in C and each grammar symbols
2165         i = 0
2166         while i < len(C):
2167             I = C[i]
2168             i += 1
2169
2170             # Collect all of the symbols that could possibly be in the goto(I,X) sets
2171             asyms = {}
2172             for ii in I:
2173                 for s in ii.usyms:
2174                     asyms[s] = None
2175
2176             for x in asyms:
2177                 g = self.lr0_goto(I, x)
2178                 if not g or id(g) in self.lr0_cidhash:
2179                     continue
2180                 self.lr0_cidhash[id(g)] = len(C)
2181                 C.append(g)
2182
2183         return C
2184
2185     # -----------------------------------------------------------------------------
2186     #                       ==== LALR(1) Parsing ====
2187     #
2188     # LALR(1) parsing is almost exactly the same as SLR except that instead of
2189     # relying upon Follow() sets when performing reductions, a more selective
2190     # lookahead set that incorporates the state of the LR(0) machine is utilized.
2191     # Thus, we mainly just have to focus on calculating the lookahead sets.
2192     #
2193     # The method used here is due to DeRemer and Pennelo (1982).
2194     #
2195     # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2196     #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2197     #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
2198     #
2199     # Further details can also be found in:
2200     #
2201     #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2202     #      McGraw-Hill Book Company, (1985).
2203     #
2204     # -----------------------------------------------------------------------------
2205
2206     # -----------------------------------------------------------------------------
2207     # compute_nullable_nonterminals()
2208     #
2209     # Creates a dictionary containing all of the non-terminals that might produce
2210     # an empty production.
2211     # -----------------------------------------------------------------------------
2212
2213     def compute_nullable_nonterminals(self):
2214         nullable = set()
2215         num_nullable = 0
2216         while True:
2217             for p in self.grammar.Productions[1:]:
2218                 if p.len == 0:
2219                     nullable.add(p.name)
2220                     continue
2221                 for t in p.prod:
2222                     if t not in nullable:
2223                         break
2224                 else:
2225                     nullable.add(p.name)
2226             if len(nullable) == num_nullable:
2227                 break
2228             num_nullable = len(nullable)
2229         return nullable
2230
2231     # -----------------------------------------------------------------------------
2232     # find_nonterminal_trans(C)
2233     #
2234     # Given a set of LR(0) items, this functions finds all of the non-terminal
2235     # transitions.    These are transitions in which a dot appears immediately before
2236     # a non-terminal.   Returns a list of tuples of the form (state,N) where state
2237     # is the state number and N is the nonterminal symbol.
2238     #
2239     # The input C is the set of LR(0) items.
2240     # -----------------------------------------------------------------------------
2241
2242     def find_nonterminal_transitions(self, C):
2243         trans = []
2244         for stateno, state in enumerate(C):
2245             for p in state:
2246                 if p.lr_index < p.len - 1:
2247                     t = (stateno, p.prod[p.lr_index+1])
2248                     if t[1] in self.grammar.Nonterminals:
2249                         if t not in trans:
2250                             trans.append(t)
2251         return trans
2252
2253     # -----------------------------------------------------------------------------
2254     # dr_relation()
2255     #
2256     # Computes the DR(p,A) relationships for non-terminal transitions.  The input
2257     # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2258     #
2259     # Returns a list of terminals.
2260     # -----------------------------------------------------------------------------
2261
2262     def dr_relation(self, C, trans, nullable):
2263         state, N = trans
2264         terms = []
2265
2266         g = self.lr0_goto(C[state], N)
2267         for p in g:
2268             if p.lr_index < p.len - 1:
2269                 a = p.prod[p.lr_index+1]
2270                 if a in self.grammar.Terminals:
2271                     if a not in terms:
2272                         terms.append(a)
2273
2274         # This extra bit is to handle the start state
2275         if state == 0 and N == self.grammar.Productions[0].prod[0]:
2276             terms.append('$end')
2277
2278         return terms
2279
2280     # -----------------------------------------------------------------------------
2281     # reads_relation()
2282     #
2283     # Computes the READS() relation (p,A) READS (t,C).
2284     # -----------------------------------------------------------------------------
2285
2286     def reads_relation(self, C, trans, empty):
2287         # Look for empty transitions
2288         rel = []
2289         state, N = trans
2290
2291         g = self.lr0_goto(C[state], N)
2292         j = self.lr0_cidhash.get(id(g), -1)
2293         for p in g:
2294             if p.lr_index < p.len - 1:
2295                 a = p.prod[p.lr_index + 1]
2296                 if a in empty:
2297                     rel.append((j, a))
2298
2299         return rel
2300
2301     # -----------------------------------------------------------------------------
2302     # compute_lookback_includes()
2303     #
2304     # Determines the lookback and includes relations
2305     #
2306     # LOOKBACK:
2307     #
2308     # This relation is determined by running the LR(0) state machine forward.
2309     # For example, starting with a production "N : . A B C", we run it forward
2310     # to obtain "N : A B C ."   We then build a relationship between this final
2311     # state and the starting state.   These relationships are stored in a dictionary
2312     # lookdict.
2313     #
2314     # INCLUDES:
2315     #
2316     # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2317     #
2318     # This relation is used to determine non-terminal transitions that occur
2319     # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
2320     # if the following holds:
2321     #
2322     #       B -> LAT, where T -> epsilon and p' -L-> p
2323     #
2324     # L is essentially a prefix (which may be empty), T is a suffix that must be
2325     # able to derive an empty string.  State p' must lead to state p with the string L.
2326     #
2327     # -----------------------------------------------------------------------------
2328
2329     def compute_lookback_includes(self, C, trans, nullable):
2330         lookdict = {}          # Dictionary of lookback relations
2331         includedict = {}       # Dictionary of include relations
2332
2333         # Make a dictionary of non-terminal transitions
2334         dtrans = {}
2335         for t in trans:
2336             dtrans[t] = 1
2337
2338         # Loop over all transitions and compute lookbacks and includes
2339         for state, N in trans:
2340             lookb = []
2341             includes = []
2342             for p in C[state]:
2343                 if p.name != N:
2344                     continue
2345
2346                 # Okay, we have a name match.  We now follow the production all the way
2347                 # through the state machine until we get the . on the right hand side
2348
2349                 lr_index = p.lr_index
2350                 j = state
2351                 while lr_index < p.len - 1:
2352                     lr_index = lr_index + 1
2353                     t = p.prod[lr_index]
2354
2355                     # Check to see if this symbol and state are a non-terminal transition
2356                     if (j, t) in dtrans:
2357                         # Yes.  Okay, there is some chance that this is an includes relation
2358                         # the only way to know for certain is whether the rest of the
2359                         # production derives empty
2360
2361                         li = lr_index + 1
2362                         while li < p.len:
2363                             if p.prod[li] in self.grammar.Terminals:
2364                                 break      # No forget it
2365                             if p.prod[li] not in nullable:
2366                                 break
2367                             li = li + 1
2368                         else:
2369                             # Appears to be a relation between (j,t) and (state,N)
2370                             includes.append((j, t))
2371
2372                     g = self.lr0_goto(C[j], t)               # Go to next set
2373                     j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
2374
2375                 # When we get here, j is the final state, now we have to locate the production
2376                 for r in C[j]:
2377                     if r.name != p.name:
2378                         continue
2379                     if r.len != p.len:
2380                         continue
2381                     i = 0
2382                     # This look is comparing a production ". A B C" with "A B C ."
2383                     while i < r.lr_index:
2384                         if r.prod[i] != p.prod[i+1]:
2385                             break
2386                         i = i + 1
2387                     else:
2388                         lookb.append((j, r))
2389             for i in includes:
2390                 if i not in includedict:
2391                     includedict[i] = []
2392                 includedict[i].append((state, N))
2393             lookdict[(state, N)] = lookb
2394
2395         return lookdict, includedict
2396
2397     # -----------------------------------------------------------------------------
2398     # compute_read_sets()
2399     #
2400     # Given a set of LR(0) items, this function computes the read sets.
2401     #
2402     # Inputs:  C        =  Set of LR(0) items
2403     #          ntrans   = Set of nonterminal transitions
2404     #          nullable = Set of empty transitions
2405     #
2406     # Returns a set containing the read sets
2407     # -----------------------------------------------------------------------------
2408
2409     def compute_read_sets(self, C, ntrans, nullable):
2410         FP = lambda x: self.dr_relation(C, x, nullable)
2411         R =  lambda x: self.reads_relation(C, x, nullable)
2412         F = digraph(ntrans, R, FP)
2413         return F
2414
2415     # -----------------------------------------------------------------------------
2416     # compute_follow_sets()
2417     #
2418     # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2419     # and an include set, this function computes the follow sets
2420     #
2421     # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2422     #
2423     # Inputs:
2424     #            ntrans     = Set of nonterminal transitions
2425     #            readsets   = Readset (previously computed)
2426     #            inclsets   = Include sets (previously computed)
2427     #
2428     # Returns a set containing the follow sets
2429     # -----------------------------------------------------------------------------
2430
2431     def compute_follow_sets(self, ntrans, readsets, inclsets):
2432         FP = lambda x: readsets[x]
2433         R  = lambda x: inclsets.get(x, [])
2434         F = digraph(ntrans, R, FP)
2435         return F
2436
2437     # -----------------------------------------------------------------------------
2438     # add_lookaheads()
2439     #
2440     # Attaches the lookahead symbols to grammar rules.
2441     #
2442     # Inputs:    lookbacks         -  Set of lookback relations
2443     #            followset         -  Computed follow set
2444     #
2445     # This function directly attaches the lookaheads to productions contained
2446     # in the lookbacks set
2447     # -----------------------------------------------------------------------------
2448
2449     def add_lookaheads(self, lookbacks, followset):
2450         for trans, lb in lookbacks.items():
2451             # Loop over productions in lookback
2452             for state, p in lb:
2453                 if state not in p.lookaheads:
2454                     p.lookaheads[state] = []
2455                 f = followset.get(trans, [])
2456                 for a in f:
2457                     if a not in p.lookaheads[state]:
2458                         p.lookaheads[state].append(a)
2459
2460     # -----------------------------------------------------------------------------
2461     # add_lalr_lookaheads()
2462     #
2463     # This function does all of the work of adding lookahead information for use
2464     # with LALR parsing
2465     # -----------------------------------------------------------------------------
2466
2467     def add_lalr_lookaheads(self, C):
2468         # Determine all of the nullable nonterminals
2469         nullable = self.compute_nullable_nonterminals()
2470
2471         # Find all non-terminal transitions
2472         trans = self.find_nonterminal_transitions(C)
2473
2474         # Compute read sets
2475         readsets = self.compute_read_sets(C, trans, nullable)
2476
2477         # Compute lookback/includes relations
2478         lookd, included = self.compute_lookback_includes(C, trans, nullable)
2479
2480         # Compute LALR FOLLOW sets
2481         followsets = self.compute_follow_sets(trans, readsets, included)
2482
2483         # Add all of the lookaheads
2484         self.add_lookaheads(lookd, followsets)
2485
2486     # -----------------------------------------------------------------------------
2487     # lr_parse_table()
2488     #
2489     # This function constructs the parse tables for SLR or LALR
2490     # -----------------------------------------------------------------------------
2491     def lr_parse_table(self):
2492         Productions = self.grammar.Productions
2493         Precedence  = self.grammar.Precedence
2494         goto   = self.lr_goto         # Goto array
2495         action = self.lr_action       # Action array
2496         log    = self.log             # Logger for output
2497
2498         actionp = {}                  # Action production array (temporary)
2499
2500         log.info('Parsing method: %s', self.lr_method)
2501
2502         # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2503         # This determines the number of states
2504
2505         C = self.lr0_items()
2506
2507         if self.lr_method == 'LALR':
2508             self.add_lalr_lookaheads(C)
2509
2510         # Build the parser table, state by state
2511         st = 0
2512         for I in C:
2513             # Loop over each production in I
2514             actlist = []              # List of actions
2515             st_action  = {}
2516             st_actionp = {}
2517             st_goto    = {}
2518             log.info('')
2519             log.info('state %d', st)
2520             log.info('')
2521             for p in I:
2522                 log.info('    (%d) %s', p.number, p)
2523             log.info('')
2524
2525             for p in I:
2526                     if p.len == p.lr_index + 1:
2527                         if p.name == "S'":
2528                             # Start symbol. Accept!
2529                             st_action['$end'] = 0
2530                             st_actionp['$end'] = p
2531                         else:
2532                             # We are at the end of a production.  Reduce!
2533                             if self.lr_method == 'LALR':
2534                                 laheads = p.lookaheads[st]
2535                             else:
2536                                 laheads = self.grammar.Follow[p.name]
2537                             for a in laheads:
2538                                 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2539                                 r = st_action.get(a)
2540                                 if r is not None:
2541                                     # Whoa. Have a shift/reduce or reduce/reduce conflict
2542                                     if r > 0:
2543                                         # Need to decide on shift or reduce here
2544                                         # By default we favor shifting. Need to add
2545                                         # some precedence rules here.
2546
2547                                         # Shift precedence comes from the token
2548                                         sprec, slevel = Precedence.get(a, ('right', 0))
2549
2550                                         # Reduce precedence comes from rule being reduced (p)
2551                                         rprec, rlevel = Productions[p.number].prec
2552
2553                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2554                                             # We really need to reduce here.
2555                                             st_action[a] = -p.number
2556                                             st_actionp[a] = p
2557                                             if not slevel and not rlevel:
2558                                                 log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2559                                                 self.sr_conflicts.append((st, a, 'reduce'))
2560                                             Productions[p.number].reduced += 1
2561                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2562                                             st_action[a] = None
2563                                         else:
2564                                             # Hmmm. Guess we'll keep the shift
2565                                             if not rlevel:
2566                                                 log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2567                                                 self.sr_conflicts.append((st, a, 'shift'))
2568                                     elif r < 0:
2569                                         # Reduce/reduce conflict.   In this case, we favor the rule
2570                                         # that was defined first in the grammar file
2571                                         oldp = Productions[-r]
2572                                         pp = Productions[p.number]
2573                                         if oldp.line > pp.line:
2574                                             st_action[a] = -p.number
2575                                             st_actionp[a] = p
2576                                             chosenp, rejectp = pp, oldp
2577                                             Productions[p.number].reduced += 1
2578                                             Productions[oldp.number].reduced -= 1
2579                                         else:
2580                                             chosenp, rejectp = oldp, pp
2581                                         self.rr_conflicts.append((st, chosenp, rejectp))
2582                                         log.info('  ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2583                                                  a, st_actionp[a].number, st_actionp[a])
2584                                     else:
2585                                         raise LALRError('Unknown conflict in state %d' % st)
2586                                 else:
2587                                     st_action[a] = -p.number
2588                                     st_actionp[a] = p
2589                                     Productions[p.number].reduced += 1
2590                     else:
2591                         i = p.lr_index
2592                         a = p.prod[i+1]       # Get symbol right after the "."
2593                         if a in self.grammar.Terminals:
2594                             g = self.lr0_goto(I, a)
2595                             j = self.lr0_cidhash.get(id(g), -1)
2596                             if j >= 0:
2597                                 # We are in a shift state
2598                                 actlist.append((a, p, 'shift and go to state %d' % j))
2599                                 r = st_action.get(a)
2600                                 if r is not None:
2601                                     # Whoa have a shift/reduce or shift/shift conflict
2602                                     if r > 0:
2603                                         if r != j:
2604                                             raise LALRError('Shift/shift conflict in state %d' % st)
2605                                     elif r < 0:
2606                                         # Do a precedence check.
2607                                         #   -  if precedence of reduce rule is higher, we reduce.
2608                                         #   -  if precedence of reduce is same and left assoc, we reduce.
2609                                         #   -  otherwise we shift
2610
2611                                         # Shift precedence comes from the token
2612                                         sprec, slevel = Precedence.get(a, ('right', 0))
2613
2614                                         # Reduce precedence comes from the rule that could have been reduced
2615                                         rprec, rlevel = Productions[st_actionp[a].number].prec
2616
2617                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2618                                             # We decide to shift here... highest precedence to shift
2619                                             Productions[st_actionp[a].number].reduced -= 1
2620                                             st_action[a] = j
2621                                             st_actionp[a] = p
2622                                             if not rlevel:
2623                                                 log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2624                                                 self.sr_conflicts.append((st, a, 'shift'))
2625                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2626                                             st_action[a] = None
2627                                         else:
2628                                             # Hmmm. Guess we'll keep the reduce
2629                                             if not slevel and not rlevel:
2630                                                 log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2631                                                 self.sr_conflicts.append((st, a, 'reduce'))
2632
2633                                     else:
2634                                         raise LALRError('Unknown conflict in state %d' % st)
2635                                 else:
2636                                     st_action[a] = j
2637                                     st_actionp[a] = p
2638
2639             # Print the actions associated with each terminal
2640             _actprint = {}
2641             for a, p, m in actlist:
2642                 if a in st_action:
2643                     if p is st_actionp[a]:
2644                         log.info('    %-15s %s', a, m)
2645                         _actprint[(a, m)] = 1
2646             log.info('')
2647             # Print the actions that were not used. (debugging)
2648             not_used = 0
2649             for a, p, m in actlist:
2650                 if a in st_action:
2651                     if p is not st_actionp[a]:
2652                         if not (a, m) in _actprint:
2653                             log.debug('  ! %-15s [ %s ]', a, m)
2654                             not_used = 1
2655                             _actprint[(a, m)] = 1
2656             if not_used:
2657                 log.debug('')
2658
2659             # Construct the goto table for this state
2660
2661             nkeys = {}
2662             for ii in I:
2663                 for s in ii.usyms:
2664                     if s in self.grammar.Nonterminals:
2665                         nkeys[s] = None
2666             for n in nkeys:
2667                 g = self.lr0_goto(I, n)
2668                 j = self.lr0_cidhash.get(id(g), -1)
2669                 if j >= 0:
2670                     st_goto[n] = j
2671                     log.info('    %-30s shift and go to state %d', n, j)
2672
2673             action[st] = st_action
2674             actionp[st] = st_actionp
2675             goto[st] = st_goto
2676             st += 1
2677
2678     # -----------------------------------------------------------------------------
2679     # write()
2680     #
2681     # This function writes the LR parsing tables to a file
2682     # -----------------------------------------------------------------------------
2683
2684     def write_table(self, tabmodule, outputdir='', signature=''):
2685         if isinstance(tabmodule, types.ModuleType):
2686             raise IOError("Won't overwrite existing tabmodule")
2687
2688         basemodulename = tabmodule.split('.')[-1]
2689         filename = os.path.join(outputdir, basemodulename) + '.py'
2690         try:
2691             f = open(filename, 'w')
2692
2693             f.write('''
2694 # %s
2695 # This file is automatically generated. Do not edit.
2696 # pylint: disable=W,C,R
2697 _tabversion = %r
2698
2699 _lr_method = %r
2700
2701 _lr_signature = %r
2702     ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2703
2704             # Change smaller to 0 to go back to original tables
2705             smaller = 1
2706
2707             # Factor out names to try and make smaller
2708             if smaller:
2709                 items = {}
2710
2711                 for s, nd in self.lr_action.items():
2712                     for name, v in nd.items():
2713                         i = items.get(name)
2714                         if not i:
2715                             i = ([], [])
2716                             items[name] = i
2717                         i[0].append(s)
2718                         i[1].append(v)
2719
2720                 f.write('\n_lr_action_items = {')
2721                 for k, v in items.items():
2722                     f.write('%r:([' % k)
2723                     for i in v[0]:
2724                         f.write('%r,' % i)
2725                     f.write('],[')
2726                     for i in v[1]:
2727                         f.write('%r,' % i)
2728
2729                     f.write(']),')
2730                 f.write('}\n')
2731
2732                 f.write('''
2733 _lr_action = {}
2734 for _k, _v in _lr_action_items.items():
2735    for _x,_y in zip(_v[0],_v[1]):
2736       if not _x in _lr_action:  _lr_action[_x] = {}
2737       _lr_action[_x][_k] = _y
2738 del _lr_action_items
2739 ''')
2740
2741             else:
2742                 f.write('\n_lr_action = { ')
2743                 for k, v in self.lr_action.items():
2744                     f.write('(%r,%r):%r,' % (k[0], k[1], v))
2745                 f.write('}\n')
2746
2747             if smaller:
2748                 # Factor out names to try and make smaller
2749                 items = {}
2750
2751                 for s, nd in self.lr_goto.items():
2752                     for name, v in nd.items():
2753                         i = items.get(name)
2754                         if not i:
2755                             i = ([], [])
2756                             items[name] = i
2757                         i[0].append(s)
2758                         i[1].append(v)
2759
2760                 f.write('\n_lr_goto_items = {')
2761                 for k, v in items.items():
2762                     f.write('%r:([' % k)
2763                     for i in v[0]:
2764                         f.write('%r,' % i)
2765                     f.write('],[')
2766                     for i in v[1]:
2767                         f.write('%r,' % i)
2768
2769                     f.write(']),')
2770                 f.write('}\n')
2771
2772                 f.write('''
2773 _lr_goto = {}
2774 for _k, _v in _lr_goto_items.items():
2775    for _x, _y in zip(_v[0], _v[1]):
2776        if not _x in _lr_goto: _lr_goto[_x] = {}
2777        _lr_goto[_x][_k] = _y
2778 del _lr_goto_items
2779 ''')
2780             else:
2781                 f.write('\n_lr_goto = { ')
2782                 for k, v in self.lr_goto.items():
2783                     f.write('(%r,%r):%r,' % (k[0], k[1], v))
2784                 f.write('}\n')
2785
2786             # Write production table
2787             f.write('_lr_productions = [\n')
2788             for p in self.lr_productions:
2789                 if p.func:
2790                     f.write('  (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2791                                                           p.func, os.path.basename(p.file), p.line))
2792                 else:
2793                     f.write('  (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2794             f.write(']\n')
2795             f.close()
2796
2797         except IOError as e:
2798             raise
2799
2800
2801     # -----------------------------------------------------------------------------
2802     # pickle_table()
2803     #
2804     # This function pickles the LR parsing tables to a supplied file object
2805     # -----------------------------------------------------------------------------
2806
2807     def pickle_table(self, filename, signature=''):
2808         try:
2809             import cPickle as pickle
2810         except ImportError:
2811             import pickle
2812         with open(filename, 'wb') as outf:
2813             pickle.dump(__tabversion__, outf, pickle_protocol)
2814             pickle.dump(self.lr_method, outf, pickle_protocol)
2815             pickle.dump(signature, outf, pickle_protocol)
2816             pickle.dump(self.lr_action, outf, pickle_protocol)
2817             pickle.dump(self.lr_goto, outf, pickle_protocol)
2818
2819             outp = []
2820             for p in self.lr_productions:
2821                 if p.func:
2822                     outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2823                 else:
2824                     outp.append((str(p), p.name, p.len, None, None, None))
2825             pickle.dump(outp, outf, pickle_protocol)
2826
2827 # -----------------------------------------------------------------------------
2828 #                            === INTROSPECTION ===
2829 #
2830 # The following functions and classes are used to implement the PLY
2831 # introspection features followed by the yacc() function itself.
2832 # -----------------------------------------------------------------------------
2833
2834 # -----------------------------------------------------------------------------
2835 # get_caller_module_dict()
2836 #
2837 # This function returns a dictionary containing all of the symbols defined within
2838 # a caller further down the call stack.  This is used to get the environment
2839 # associated with the yacc() call if none was provided.
2840 # -----------------------------------------------------------------------------
2841
2842 def get_caller_module_dict(levels):
2843     f = sys._getframe(levels)
2844     ldict = f.f_globals.copy()
2845     if f.f_globals != f.f_locals:
2846         ldict.update(f.f_locals)
2847     return ldict
2848
2849 # -----------------------------------------------------------------------------
2850 # parse_grammar()
2851 #
2852 # This takes a raw grammar rule string and parses it into production data
2853 # -----------------------------------------------------------------------------
2854 def parse_grammar(doc, file, line):
2855     grammar = []
2856     # Split the doc string into lines
2857     pstrings = doc.splitlines()
2858     lastp = None
2859     dline = line
2860     for ps in pstrings:
2861         dline += 1
2862         p = ps.split()
2863         if not p:
2864             continue
2865         try:
2866             if p[0] == '|':
2867                 # This is a continuation of a previous rule
2868                 if not lastp:
2869                     raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2870                 prodname = lastp
2871                 syms = p[1:]
2872             else:
2873                 prodname = p[0]
2874                 lastp = prodname
2875                 syms   = p[2:]
2876                 assign = p[1]
2877                 if assign != ':' and assign != '::=':
2878                     raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2879
2880             grammar.append((file, dline, prodname, syms))
2881         except SyntaxError:
2882             raise
2883         except Exception:
2884             raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2885
2886     return grammar
2887
2888 # -----------------------------------------------------------------------------
2889 # ParserReflect()
2890 #
2891 # This class represents information extracted for building a parser including
2892 # start symbol, error function, tokens, precedence list, action functions,
2893 # etc.
2894 # -----------------------------------------------------------------------------
2895 class ParserReflect(object):
2896     def __init__(self, pdict, log=None):
2897         self.pdict      = pdict
2898         self.start      = None
2899         self.error_func = None
2900         self.tokens     = None
2901         self.modules    = set()
2902         self.grammar    = []
2903         self.error      = False
2904
2905         if log is None:
2906             self.log = PlyLogger(sys.stderr)
2907         else:
2908             self.log = log
2909
2910     # Get all of the basic information
2911     def get_all(self):
2912         self.get_start()
2913         self.get_error_func()
2914         self.get_tokens()
2915         self.get_precedence()
2916         self.get_pfunctions()
2917
2918     # Validate all of the information
2919     def validate_all(self):
2920         self.validate_start()
2921         self.validate_error_func()
2922         self.validate_tokens()
2923         self.validate_precedence()
2924         self.validate_pfunctions()
2925         self.validate_modules()
2926         return self.error
2927
2928     # Compute a signature over the grammar
2929     def signature(self):
2930         parts = []
2931         try:
2932             if self.start:
2933                 parts.append(self.start)
2934             if self.prec:
2935                 parts.append(''.join([''.join(p) for p in self.prec]))
2936             if self.tokens:
2937                 parts.append(' '.join(self.tokens))
2938             for f in self.pfuncs:
2939                 if f[3]:
2940                     parts.append(f[3])
2941         except (TypeError, ValueError):
2942             pass
2943         return ''.join(parts)
2944
2945     # -----------------------------------------------------------------------------
2946     # validate_modules()
2947     #
2948     # This method checks to see if there are duplicated p_rulename() functions
2949     # in the parser module file.  Without this function, it is really easy for
2950     # users to make mistakes by cutting and pasting code fragments (and it's a real
2951     # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2952     # we just do a little regular expression pattern matching of def statements
2953     # to try and detect duplicates.
2954     # -----------------------------------------------------------------------------
2955
2956     def validate_modules(self):
2957         # Match def p_funcname(
2958         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2959
2960         for module in self.modules:
2961             try:
2962                 lines, linen = inspect.getsourcelines(module)
2963             except IOError:
2964                 continue
2965
2966             counthash = {}
2967             for linen, line in enumerate(lines):
2968                 linen += 1
2969                 m = fre.match(line)
2970                 if m:
2971                     name = m.group(1)
2972                     prev = counthash.get(name)
2973                     if not prev:
2974                         counthash[name] = linen
2975                     else:
2976                         filename = inspect.getsourcefile(module)
2977                         self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2978                                          filename, linen, name, prev)
2979
2980     # Get the start symbol
2981     def get_start(self):
2982         self.start = self.pdict.get('start')
2983
2984     # Validate the start symbol
2985     def validate_start(self):
2986         if self.start is not None:
2987             if not isinstance(self.start, string_types):
2988                 self.log.error("'start' must be a string")
2989
2990     # Look for error handler
2991     def get_error_func(self):
2992         self.error_func = self.pdict.get('p_error')
2993
2994     # Validate the error function
2995     def validate_error_func(self):
2996         if self.error_func:
2997             if isinstance(self.error_func, types.FunctionType):
2998                 ismethod = 0
2999             elif isinstance(self.error_func, types.MethodType):
3000                 ismethod = 1
3001             else:
3002                 self.log.error("'p_error' defined, but is not a function or method")
3003                 self.error = True
3004                 return
3005
3006             eline = self.error_func.__code__.co_firstlineno
3007             efile = self.error_func.__code__.co_filename
3008             module = inspect.getmodule(self.error_func)
3009             self.modules.add(module)
3010
3011             argcount = self.error_func.__code__.co_argcount - ismethod
3012             if argcount != 1:
3013                 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3014                 self.error = True
3015
3016     # Get the tokens map
3017     def get_tokens(self):
3018         tokens = self.pdict.get('tokens')
3019         if not tokens:
3020             self.log.error('No token list is defined')
3021             self.error = True
3022             return
3023
3024         if not isinstance(tokens, (list, tuple)):
3025             self.log.error('tokens must be a list or tuple')
3026             self.error = True
3027             return
3028
3029         if not tokens:
3030             self.log.error('tokens is empty')
3031             self.error = True
3032             return
3033
3034         self.tokens = sorted(tokens)
3035
3036     # Validate the tokens
3037     def validate_tokens(self):
3038         # Validate the tokens.
3039         if 'error' in self.tokens:
3040             self.log.error("Illegal token name 'error'. Is a reserved word")
3041             self.error = True
3042             return
3043
3044         terminals = set()
3045         for n in self.tokens:
3046             if n in terminals:
3047                 self.log.warning('Token %r multiply defined', n)
3048             terminals.add(n)
3049
3050     # Get the precedence map (if any)
3051     def get_precedence(self):
3052         self.prec = self.pdict.get('precedence')
3053
3054     # Validate and parse the precedence map
3055     def validate_precedence(self):
3056         preclist = []
3057         if self.prec:
3058             if not isinstance(self.prec, (list, tuple)):
3059                 self.log.error('precedence must be a list or tuple')
3060                 self.error = True
3061                 return
3062             for level, p in enumerate(self.prec):
3063                 if not isinstance(p, (list, tuple)):
3064                     self.log.error('Bad precedence table')
3065                     self.error = True
3066                     return
3067
3068                 if len(p) < 2:
3069                     self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3070                     self.error = True
3071                     return
3072                 assoc = p[0]
3073                 if not isinstance(assoc, string_types):
3074                     self.log.error('precedence associativity must be a string')
3075                     self.error = True
3076                     return
3077                 for term in p[1:]:
3078                     if not isinstance(term, string_types):
3079                         self.log.error('precedence items must be strings')
3080                         self.error = True
3081                         return
3082                     preclist.append((term, assoc, level+1))
3083         self.preclist = preclist
3084
3085     # Get all p_functions from the grammar
3086     def get_pfunctions(self):
3087         p_functions = []
3088         for name, item in self.pdict.items():
3089             if not name.startswith('p_') or name == 'p_error':
3090                 continue
3091             if isinstance(item, (types.FunctionType, types.MethodType)):
3092                 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
3093                 module = inspect.getmodule(item)
3094                 p_functions.append((line, module, name, item.__doc__))
3095
3096         # Sort all of the actions by line number; make sure to stringify
3097         # modules to make them sortable, since `line` may not uniquely sort all
3098         # p functions
3099         p_functions.sort(key=lambda p_function: (
3100             p_function[0],
3101             str(p_function[1]),
3102             p_function[2],
3103             p_function[3]))
3104         self.pfuncs = p_functions
3105
3106     # Validate all of the p_functions
3107     def validate_pfunctions(self):
3108         grammar = []
3109         # Check for non-empty symbols
3110         if len(self.pfuncs) == 0:
3111             self.log.error('no rules of the form p_rulename are defined')
3112             self.error = True
3113             return
3114
3115         for line, module, name, doc in self.pfuncs:
3116             file = inspect.getsourcefile(module)
3117             func = self.pdict[name]
3118             if isinstance(func, types.MethodType):
3119                 reqargs = 2
3120             else:
3121                 reqargs = 1
3122             if func.__code__.co_argcount > reqargs:
3123                 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3124                 self.error = True
3125             elif func.__code__.co_argcount < reqargs:
3126                 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3127                 self.error = True
3128             elif not func.__doc__:
3129                 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3130                                  file, line, func.__name__)
3131             else:
3132                 try:
3133                     parsed_g = parse_grammar(doc, file, line)
3134                     for g in parsed_g:
3135                         grammar.append((name, g))
3136                 except SyntaxError as e:
3137                     self.log.error(str(e))
3138                     self.error = True
3139
3140                 # Looks like a valid grammar rule
3141                 # Mark the file in which defined.
3142                 self.modules.add(module)
3143
3144         # Secondary validation step that looks for p_ definitions that are not functions
3145         # or functions that look like they might be grammar rules.
3146
3147         for n, v in self.pdict.items():
3148             if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3149                 continue
3150             if n.startswith('t_'):
3151                 continue
3152             if n.startswith('p_') and n != 'p_error':
3153                 self.log.warning('%r not defined as a function', n)
3154             if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3155                    (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3156                 if v.__doc__:
3157                     try:
3158                         doc = v.__doc__.split(' ')
3159                         if doc[1] == ':':
3160                             self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3161                                              v.__code__.co_filename, v.__code__.co_firstlineno, n)
3162                     except IndexError:
3163                         pass
3164
3165         self.grammar = grammar
3166
3167 # -----------------------------------------------------------------------------
3168 # yacc(module)
3169 #
3170 # Build a parser
3171 # -----------------------------------------------------------------------------
3172
3173 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3174          check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3175          outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3176
3177     if tabmodule is None:
3178         tabmodule = tab_module
3179
3180     # Reference to the parsing method of the last built parser
3181     global parse
3182
3183     # If pickling is enabled, table files are not created
3184     if picklefile:
3185         write_tables = 0
3186
3187     if errorlog is None:
3188         errorlog = PlyLogger(sys.stderr)
3189
3190     # Get the module dictionary used for the parser
3191     if module:
3192         _items = [(k, getattr(module, k)) for k in dir(module)]
3193         pdict = dict(_items)
3194         # If no __file__ or __package__ attributes are available, try to obtain them
3195         # from the __module__ instead
3196         if '__file__' not in pdict:
3197             pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3198         if '__package__' not in pdict and '__module__' in pdict:
3199             if hasattr(sys.modules[pdict['__module__']], '__package__'):
3200                 pdict['__package__'] = sys.modules[pdict['__module__']].__package__
3201     else:
3202         pdict = get_caller_module_dict(2)
3203
3204     if outputdir is None:
3205         # If no output directory is set, the location of the output files
3206         # is determined according to the following rules:
3207         #     - If tabmodule specifies a package, files go into that package directory
3208         #     - Otherwise, files go in the same directory as the specifying module
3209         if isinstance(tabmodule, types.ModuleType):
3210             srcfile = tabmodule.__file__
3211         else:
3212             if '.' not in tabmodule:
3213                 srcfile = pdict['__file__']
3214             else:
3215                 parts = tabmodule.split('.')
3216                 pkgname = '.'.join(parts[:-1])
3217                 exec('import %s' % pkgname)
3218                 srcfile = getattr(sys.modules[pkgname], '__file__', '')
3219         outputdir = os.path.dirname(srcfile)
3220
3221     # Determine if the module is package of a package or not.
3222     # If so, fix the tabmodule setting so that tables load correctly
3223     pkg = pdict.get('__package__')
3224     if pkg and isinstance(tabmodule, str):
3225         if '.' not in tabmodule:
3226             tabmodule = pkg + '.' + tabmodule
3227
3228
3229
3230     # Set start symbol if it's specified directly using an argument
3231     if start is not None:
3232         pdict['start'] = start
3233
3234     # Collect parser information from the dictionary
3235     pinfo = ParserReflect(pdict, log=errorlog)
3236     pinfo.get_all()
3237
3238     if pinfo.error:
3239         raise YaccError('Unable to build parser')
3240
3241     # Check signature against table files (if any)
3242     signature = pinfo.signature()
3243
3244     # Read the tables
3245     try:
3246         lr = LRTable()
3247         if picklefile:
3248             read_signature = lr.read_pickle(picklefile)
3249         else:
3250             read_signature = lr.read_table(tabmodule)
3251         if optimize or (read_signature == signature):
3252             try:
3253                 lr.bind_callables(pinfo.pdict)
3254                 parser = LRParser(lr, pinfo.error_func)
3255                 parse = parser.parse
3256                 return parser
3257             except Exception as e:
3258                 errorlog.warning('There was a problem loading the table file: %r', e)
3259     except VersionError as e:
3260         errorlog.warning(str(e))
3261     except ImportError:
3262         pass
3263
3264     if debuglog is None:
3265         if debug:
3266             try:
3267                 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3268             except IOError as e:
3269                 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3270                 debuglog = NullLogger()
3271         else:
3272             debuglog = NullLogger()
3273
3274     debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3275
3276     errors = False
3277
3278     # Validate the parser information
3279     if pinfo.validate_all():
3280         raise YaccError('Unable to build parser')
3281
3282     if not pinfo.error_func:
3283         errorlog.warning('no p_error() function is defined')
3284
3285     # Create a grammar object
3286     grammar = Grammar(pinfo.tokens)
3287
3288     # Set precedence level for terminals
3289     for term, assoc, level in pinfo.preclist:
3290         try:
3291             grammar.set_precedence(term, assoc, level)
3292         except GrammarError as e:
3293             errorlog.warning('%s', e)
3294
3295     # Add productions to the grammar
3296     for funcname, gram in pinfo.grammar:
3297         file, line, prodname, syms = gram
3298         try:
3299             grammar.add_production(prodname, syms, funcname, file, line)
3300         except GrammarError as e:
3301             errorlog.error('%s', e)
3302             errors = True
3303
3304     # Set the grammar start symbols
3305     try:
3306         if start is None:
3307             grammar.set_start(pinfo.start)
3308         else:
3309             grammar.set_start(start)
3310     except GrammarError as e:
3311         errorlog.error(str(e))
3312         errors = True
3313
3314     if errors:
3315         raise YaccError('Unable to build parser')
3316
3317     # Verify the grammar structure
3318     undefined_symbols = grammar.undefined_symbols()
3319     for sym, prod in undefined_symbols:
3320         errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3321         errors = True
3322
3323     unused_terminals = grammar.unused_terminals()
3324     if unused_terminals:
3325         debuglog.info('')
3326         debuglog.info('Unused terminals:')
3327         debuglog.info('')
3328         for term in unused_terminals:
3329             errorlog.warning('Token %r defined, but not used', term)
3330             debuglog.info('    %s', term)
3331
3332     # Print out all productions to the debug log
3333     if debug:
3334         debuglog.info('')
3335         debuglog.info('Grammar')
3336         debuglog.info('')
3337         for n, p in enumerate(grammar.Productions):
3338             debuglog.info('Rule %-5d %s', n, p)
3339
3340     # Find unused non-terminals
3341     unused_rules = grammar.unused_rules()
3342     for prod in unused_rules:
3343         errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3344
3345     if len(unused_terminals) == 1:
3346         errorlog.warning('There is 1 unused token')
3347     if len(unused_terminals) > 1:
3348         errorlog.warning('There are %d unused tokens', len(unused_terminals))
3349
3350     if len(unused_rules) == 1:
3351         errorlog.warning('There is 1 unused rule')
3352     if len(unused_rules) > 1:
3353         errorlog.warning('There are %d unused rules', len(unused_rules))
3354
3355     if debug:
3356         debuglog.info('')
3357         debuglog.info('Terminals, with rules where they appear')
3358         debuglog.info('')
3359         terms = list(grammar.Terminals)
3360         terms.sort()
3361         for term in terms:
3362             debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3363
3364         debuglog.info('')
3365         debuglog.info('Nonterminals, with rules where they appear')
3366         debuglog.info('')
3367         nonterms = list(grammar.Nonterminals)
3368         nonterms.sort()
3369         for nonterm in nonterms:
3370             debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3371         debuglog.info('')
3372
3373     if check_recursion:
3374         unreachable = grammar.find_unreachable()
3375         for u in unreachable:
3376             errorlog.warning('Symbol %r is unreachable', u)
3377
3378         infinite = grammar.infinite_cycles()
3379         for inf in infinite:
3380             errorlog.error('Infinite recursion detected for symbol %r', inf)
3381             errors = True
3382
3383     unused_prec = grammar.unused_precedence()
3384     for term, assoc in unused_prec:
3385         errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3386         errors = True
3387
3388     if errors:
3389         raise YaccError('Unable to build parser')
3390
3391     # Run the LRGeneratedTable on the grammar
3392     if debug:
3393         errorlog.debug('Generating %s tables', method)
3394
3395     lr = LRGeneratedTable(grammar, method, debuglog)
3396
3397     if debug:
3398         num_sr = len(lr.sr_conflicts)
3399
3400         # Report shift/reduce and reduce/reduce conflicts
3401         if num_sr == 1:
3402             errorlog.warning('1 shift/reduce conflict')
3403         elif num_sr > 1:
3404             errorlog.warning('%d shift/reduce conflicts', num_sr)
3405
3406         num_rr = len(lr.rr_conflicts)
3407         if num_rr == 1:
3408             errorlog.warning('1 reduce/reduce conflict')
3409         elif num_rr > 1:
3410             errorlog.warning('%d reduce/reduce conflicts', num_rr)
3411
3412     # Write out conflicts to the output file
3413     if debug and (lr.sr_conflicts or lr.rr_conflicts):
3414         debuglog.warning('')
3415         debuglog.warning('Conflicts:')
3416         debuglog.warning('')
3417
3418         for state, tok, resolution in lr.sr_conflicts:
3419             debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s',  tok, state, resolution)
3420
3421         already_reported = set()
3422         for state, rule, rejected in lr.rr_conflicts:
3423             if (state, id(rule), id(rejected)) in already_reported:
3424                 continue
3425             debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3426             debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3427             errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3428             errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3429             already_reported.add((state, id(rule), id(rejected)))
3430
3431         warned_never = []
3432         for state, rule, rejected in lr.rr_conflicts:
3433             if not rejected.reduced and (rejected not in warned_never):
3434                 debuglog.warning('Rule (%s) is never reduced', rejected)
3435                 errorlog.warning('Rule (%s) is never reduced', rejected)
3436                 warned_never.append(rejected)
3437
3438     # Write the table file if requested
3439     if write_tables:
3440         try:
3441             lr.write_table(tabmodule, outputdir, signature)
3442             if tabmodule in sys.modules:
3443                 del sys.modules[tabmodule]
3444         except IOError as e:
3445             errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3446
3447     # Write a pickled version of the tables
3448     if picklefile:
3449         try:
3450             lr.pickle_table(picklefile, signature)
3451         except IOError as e:
3452             errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3453
3454     # Build the parser
3455     lr.bind_callables(pinfo.pdict)
3456     parser = LRParser(lr, pinfo.error_func)
3457
3458     parse = parser.parse
3459     return parser