Revert "TODO epan/dissectors/asn1/kerberos/packet-kerberos-template.c new GSS flags"
[wireshark-sm.git] / tools / yacc.py
blob3466fbb534dba02e626810bf5eb9dd887855e970
1 # -----------------------------------------------------------------------------
2 # ply: yacc.py
4 # Copyright (C) 2001-2018
5 # David M. Beazley (Dabeaz LLC)
6 # All rights reserved.
8 # SPDX-License-Identifier: BSD-3-Clause
10 # :::::::: WARNING :::::::
12 # Construction of LR parsing tables is fairly complicated and expensive.
13 # To make this module run fast, a *LOT* of work has been put into
14 # optimization---often at the expensive of readability and what might
15 # consider to be good Python "coding style." Modify the code at your
16 # own risk!
17 # ----------------------------------------------------------------------------
19 import re
20 import types
21 import sys
22 import os.path
23 import inspect
24 import warnings
26 __version__ = '3.11'
27 __tabversion__ = '3.10'
29 #-----------------------------------------------------------------------------
30 # === User configurable parameters ===
32 # Change these to modify the default behavior of yacc (if you wish)
33 #-----------------------------------------------------------------------------
35 yaccdebug = True # Debugging mode. If set, yacc generates a
36 # a 'parser.out' file in the current directory
38 debug_file = 'parser.out' # Default name of the debugging file
39 tab_module = 'parsetab' # Default name of the table module
40 default_lr = 'LALR' # Default LR table generation method
42 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
44 yaccdevel = False # Set to True if developing yacc. This turns off optimized
45 # implementations of certain functions.
47 resultlimit = 40 # Size limit of results when running in debug mode.
49 pickle_protocol = 0 # Protocol to use when writing pickle files
51 # String type-checking compatibility
52 if sys.version_info[0] < 3:
53 string_types = basestring
54 else:
55 string_types = str
57 MAXINT = sys.maxsize
59 # This object is a stand-in for a logging object created by the
60 # logging module. PLY will use this by default to create things
61 # such as the parser.out file. If a user wants more detailed
62 # information, they can create their own logging object and pass
63 # it into PLY.
65 class PlyLogger(object):
66 def __init__(self, f):
67 self.f = f
69 def debug(self, msg, *args, **kwargs):
70 self.f.write((msg % args) + '\n')
72 info = debug
74 def warning(self, msg, *args, **kwargs):
75 self.f.write('WARNING: ' + (msg % args) + '\n')
77 def error(self, msg, *args, **kwargs):
78 self.f.write('ERROR: ' + (msg % args) + '\n')
80 critical = debug
82 # Null logger is used when no output is generated. Does nothing.
83 class NullLogger(object):
84 def __getattribute__(self, name):
85 return self
87 def __call__(self, *args, **kwargs):
88 return self
90 # Exception raised for yacc-related errors
91 class YaccError(Exception):
92 pass
94 # Format the result message that the parser produces when running in debug mode.
95 def format_result(r):
96 repr_str = repr(r)
97 if '\n' in repr_str:
98 repr_str = repr(repr_str)
99 if len(repr_str) > resultlimit:
100 repr_str = repr_str[:resultlimit] + ' ...'
101 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
102 return result
104 # Format stack entries when the parser is running in debug mode
105 def format_stack_entry(r):
106 repr_str = repr(r)
107 if '\n' in repr_str:
108 repr_str = repr(repr_str)
109 if len(repr_str) < 16:
110 return repr_str
111 else:
112 return '<%s @ 0x%x>' % (type(r).__name__, id(r))
114 # Panic mode error recovery support. This feature is being reworked--much of the
115 # code here is to offer a deprecation/backwards compatible transition
117 _errok = None
118 _token = None
119 _restart = None
120 _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
121 Instead, invoke the methods on the associated parser instance:
123 def p_error(p):
125 # Use parser.errok(), parser.token(), parser.restart()
128 parser = yacc.yacc()
131 def errok():
132 warnings.warn(_warnmsg)
133 return _errok()
135 def restart():
136 warnings.warn(_warnmsg)
137 return _restart()
139 def token():
140 warnings.warn(_warnmsg)
141 return _token()
143 # Utility function to call the p_error() function with some deprecation hacks
144 def call_errorfunc(errorfunc, token, parser):
145 global _errok, _token, _restart
146 _errok = parser.errok
147 _token = parser.token
148 _restart = parser.restart
149 r = errorfunc(token)
150 try:
151 del _errok, _token, _restart
152 except NameError:
153 pass
154 return r
156 #-----------------------------------------------------------------------------
157 # === LR Parsing Engine ===
159 # The following classes are used for the LR parser itself. These are not
160 # used during table construction and are independent of the actual LR
161 # table generation algorithm
162 #-----------------------------------------------------------------------------
164 # This class is used to hold non-terminal grammar symbols during parsing.
165 # It normally has the following attributes set:
166 # .type = Grammar symbol type
167 # .value = Symbol value
168 # .lineno = Starting line number
169 # .endlineno = Ending line number (optional, set automatically)
170 # .lexpos = Starting lex position
171 # .endlexpos = Ending lex position (optional, set automatically)
173 class YaccSymbol:
174 def __str__(self):
175 return self.type
177 def __repr__(self):
178 return str(self)
180 # This class is a wrapper around the objects actually passed to each
181 # grammar rule. Index lookup and assignment actually assign the
182 # .value attribute of the underlying YaccSymbol object.
183 # The lineno() method returns the line number of a given
184 # item (or 0 if not defined). The linespan() method returns
185 # a tuple of (startline,endline) representing the range of lines
186 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
187 # representing the range of positional information for a symbol.
189 class YaccProduction:
190 def __init__(self, s, stack=None):
191 self.slice = s
192 self.stack = stack
193 self.lexer = None
194 self.parser = None
196 def __getitem__(self, n):
197 if isinstance(n, slice):
198 return [s.value for s in self.slice[n]]
199 elif n >= 0:
200 return self.slice[n].value
201 else:
202 return self.stack[n].value
204 def __setitem__(self, n, v):
205 self.slice[n].value = v
207 def __getslice__(self, i, j):
208 return [s.value for s in self.slice[i:j]]
210 def __len__(self):
211 return len(self.slice)
213 def lineno(self, n):
214 return getattr(self.slice[n], 'lineno', 0)
216 def set_lineno(self, n, lineno):
217 self.slice[n].lineno = lineno
219 def linespan(self, n):
220 startline = getattr(self.slice[n], 'lineno', 0)
221 endline = getattr(self.slice[n], 'endlineno', startline)
222 return startline, endline
224 def lexpos(self, n):
225 return getattr(self.slice[n], 'lexpos', 0)
227 def set_lexpos(self, n, lexpos):
228 self.slice[n].lexpos = lexpos
230 def lexspan(self, n):
231 startpos = getattr(self.slice[n], 'lexpos', 0)
232 endpos = getattr(self.slice[n], 'endlexpos', startpos)
233 return startpos, endpos
235 def error(self):
236 raise SyntaxError
238 # -----------------------------------------------------------------------------
239 # == LRParser ==
241 # The LR Parsing engine.
242 # -----------------------------------------------------------------------------
244 class LRParser:
245 def __init__(self, lrtab, errorf):
246 self.productions = lrtab.lr_productions
247 self.action = lrtab.lr_action
248 self.goto = lrtab.lr_goto
249 self.errorfunc = errorf
250 self.set_defaulted_states()
251 self.errorok = True
253 def errok(self):
254 self.errorok = True
256 def restart(self):
257 del self.statestack[:]
258 del self.symstack[:]
259 sym = YaccSymbol()
260 sym.type = '$end'
261 self.symstack.append(sym)
262 self.statestack.append(0)
264 # Defaulted state support.
265 # This method identifies parser states where there is only one possible reduction action.
266 # For such states, the parser can make a choose to make a rule reduction without consuming
267 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
268 # certain kinds of advanced parsing situations where the lexer and parser interact with
269 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
271 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
272 def set_defaulted_states(self):
273 self.defaulted_states = {}
274 for state, actions in self.action.items():
275 rules = list(actions.values())
276 if len(rules) == 1 and rules[0] < 0:
277 self.defaulted_states[state] = rules[0]
279 def disable_defaulted_states(self):
280 self.defaulted_states = {}
282 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
283 if debug or yaccdevel:
284 if isinstance(debug, int):
285 debug = PlyLogger(sys.stderr)
286 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
287 elif tracking:
288 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
289 else:
290 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
293 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
294 # parsedebug().
296 # This is the debugging enabled version of parse(). All changes made to the
297 # parsing engine should be made here. Optimized versions of this function
298 # are automatically created by the ply/ygen.py script. This script cuts out
299 # sections enclosed in markers such as this:
301 # #--! DEBUG
302 # statements
303 # #--! DEBUG
305 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
307 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
308 #--! parsedebug-start
309 lookahead = None # Current lookahead symbol
310 lookaheadstack = [] # Stack of lookahead symbols
311 actions = self.action # Local reference to action table (to avoid lookup on self.)
312 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
313 prod = self.productions # Local reference to production list (to avoid lookup on self.)
314 defaulted_states = self.defaulted_states # Local reference to defaulted states
315 pslice = YaccProduction(None) # Production object passed to grammar rules
316 errorcount = 0 # Used during error recovery
318 #--! DEBUG
319 debug.info('PLY: PARSE DEBUG START')
320 #--! DEBUG
322 # If no lexer was given, we will try to use the lex module
323 if not lexer:
324 from . import lex
325 lexer = lex.lexer
327 # Set up the lexer and parser objects on pslice
328 pslice.lexer = lexer
329 pslice.parser = self
331 # If input was supplied, pass to lexer
332 if input is not None:
333 lexer.input(input)
335 if tokenfunc is None:
336 # Tokenize function
337 get_token = lexer.token
338 else:
339 get_token = tokenfunc
341 # Set the parser() token method (sometimes used in error recovery)
342 self.token = get_token
344 # Set up the state and symbol stacks
346 statestack = [] # Stack of parsing states
347 self.statestack = statestack
348 symstack = [] # Stack of grammar symbols
349 self.symstack = symstack
351 pslice.stack = symstack # Put in the production
352 errtoken = None # Err token
354 # The start state is assumed to be (0,$end)
356 statestack.append(0)
357 sym = YaccSymbol()
358 sym.type = '$end'
359 symstack.append(sym)
360 state = 0
361 while True:
362 # Get the next symbol on the input. If a lookahead symbol
363 # is already set, we just use that. Otherwise, we'll pull
364 # the next token off of the lookaheadstack or from the lexer
366 #--! DEBUG
367 debug.debug('')
368 debug.debug('State : %s', state)
369 #--! DEBUG
371 if state not in defaulted_states:
372 if not lookahead:
373 if not lookaheadstack:
374 lookahead = get_token() # Get the next token
375 else:
376 lookahead = lookaheadstack.pop()
377 if not lookahead:
378 lookahead = YaccSymbol()
379 lookahead.type = '$end'
381 # Check the action table
382 ltype = lookahead.type
383 t = actions[state].get(ltype)
384 else:
385 t = defaulted_states[state]
386 #--! DEBUG
387 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
388 #--! DEBUG
390 #--! DEBUG
391 debug.debug('Stack : %s',
392 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
393 #--! DEBUG
395 if t is not None:
396 if t > 0:
397 # shift a symbol on the stack
398 statestack.append(t)
399 state = t
401 #--! DEBUG
402 debug.debug('Action : Shift and goto state %s', t)
403 #--! DEBUG
405 symstack.append(lookahead)
406 lookahead = None
408 # Decrease error count on successful shift
409 if errorcount:
410 errorcount -= 1
411 continue
413 if t < 0:
414 # reduce a symbol on the stack, emit a production
415 p = prod[-t]
416 pname = p.name
417 plen = p.len
419 # Get production function
420 sym = YaccSymbol()
421 sym.type = pname # Production name
422 sym.value = None
424 #--! DEBUG
425 if plen:
426 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
427 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
428 goto[statestack[-1-plen]][pname])
429 else:
430 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
431 goto[statestack[-1]][pname])
433 #--! DEBUG
435 if plen:
436 targ = symstack[-plen-1:]
437 targ[0] = sym
439 #--! TRACKING
440 if tracking:
441 t1 = targ[1]
442 sym.lineno = t1.lineno
443 sym.lexpos = t1.lexpos
444 t1 = targ[-1]
445 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
446 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
447 #--! TRACKING
449 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
450 # The code enclosed in this section is duplicated
451 # below as a performance optimization. Make sure
452 # changes get made in both locations.
454 pslice.slice = targ
456 try:
457 # Call the grammar rule with our special slice object
458 del symstack[-plen:]
459 self.state = state
460 p.callable(pslice)
461 del statestack[-plen:]
462 #--! DEBUG
463 debug.info('Result : %s', format_result(pslice[0]))
464 #--! DEBUG
465 symstack.append(sym)
466 state = goto[statestack[-1]][pname]
467 statestack.append(state)
468 except SyntaxError:
469 # If an error was set. Enter error recovery state
470 lookaheadstack.append(lookahead) # Save the current lookahead token
471 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
472 statestack.pop() # Pop back one state (before the reduce)
473 state = statestack[-1]
474 sym.type = 'error'
475 sym.value = 'error'
476 lookahead = sym
477 errorcount = error_count
478 self.errorok = False
480 continue
481 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
483 else:
485 #--! TRACKING
486 if tracking:
487 sym.lineno = lexer.lineno
488 sym.lexpos = lexer.lexpos
489 #--! TRACKING
491 targ = [sym]
493 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
494 # The code enclosed in this section is duplicated
495 # above as a performance optimization. Make sure
496 # changes get made in both locations.
498 pslice.slice = targ
500 try:
501 # Call the grammar rule with our special slice object
502 self.state = state
503 p.callable(pslice)
504 #--! DEBUG
505 debug.info('Result : %s', format_result(pslice[0]))
506 #--! DEBUG
507 symstack.append(sym)
508 state = goto[statestack[-1]][pname]
509 statestack.append(state)
510 except SyntaxError:
511 # If an error was set. Enter error recovery state
512 lookaheadstack.append(lookahead) # Save the current lookahead token
513 statestack.pop() # Pop back one state (before the reduce)
514 state = statestack[-1]
515 sym.type = 'error'
516 sym.value = 'error'
517 lookahead = sym
518 errorcount = error_count
519 self.errorok = False
521 continue
522 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
524 if t == 0:
525 n = symstack[-1]
526 result = getattr(n, 'value', None)
527 #--! DEBUG
528 debug.info('Done : Returning %s', format_result(result))
529 debug.info('PLY: PARSE DEBUG END')
530 #--! DEBUG
531 return result
533 if t is None:
535 #--! DEBUG
536 debug.error('Error : %s',
537 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
538 #--! DEBUG
540 # We have some kind of parsing error here. To handle
541 # this, we are going to push the current token onto
542 # the tokenstack and replace it with an 'error' token.
543 # If there are any synchronization rules, they may
544 # catch it.
546 # In addition to pushing the error token, we call call
547 # the user defined p_error() function if this is the
548 # first syntax error. This function is only called if
549 # errorcount == 0.
550 if errorcount == 0 or self.errorok:
551 errorcount = error_count
552 self.errorok = False
553 errtoken = lookahead
554 if errtoken.type == '$end':
555 errtoken = None # End of file!
556 if self.errorfunc:
557 if errtoken and not hasattr(errtoken, 'lexer'):
558 errtoken.lexer = lexer
559 self.state = state
560 tok = call_errorfunc(self.errorfunc, errtoken, self)
561 if self.errorok:
562 # User must have done some kind of panic
563 # mode recovery on their own. The
564 # returned token is the next lookahead
565 lookahead = tok
566 errtoken = None
567 continue
568 else:
569 if errtoken:
570 if hasattr(errtoken, 'lineno'):
571 lineno = lookahead.lineno
572 else:
573 lineno = 0
574 if lineno:
575 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
576 else:
577 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
578 else:
579 sys.stderr.write('yacc: Parse error in input. EOF\n')
580 return
582 else:
583 errorcount = error_count
585 # case 1: the statestack only has 1 entry on it. If we're in this state, the
586 # entire parse has been rolled back and we're completely hosed. The token is
587 # discarded and we just keep going.
589 if len(statestack) <= 1 and lookahead.type != '$end':
590 lookahead = None
591 errtoken = None
592 state = 0
593 # Nuke the pushback stack
594 del lookaheadstack[:]
595 continue
597 # case 2: the statestack has a couple of entries on it, but we're
598 # at the end of the file. nuke the top entry and generate an error token
600 # Start nuking entries on the stack
601 if lookahead.type == '$end':
602 # Whoa. We're really hosed here. Bail out
603 return
605 if lookahead.type != 'error':
606 sym = symstack[-1]
607 if sym.type == 'error':
608 # Hmmm. Error is on top of stack, we'll just nuke input
609 # symbol and continue
610 #--! TRACKING
611 if tracking:
612 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
613 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
614 #--! TRACKING
615 lookahead = None
616 continue
618 # Create the error symbol for the first time and make it the new lookahead symbol
619 t = YaccSymbol()
620 t.type = 'error'
622 if hasattr(lookahead, 'lineno'):
623 t.lineno = t.endlineno = lookahead.lineno
624 if hasattr(lookahead, 'lexpos'):
625 t.lexpos = t.endlexpos = lookahead.lexpos
626 t.value = lookahead
627 lookaheadstack.append(lookahead)
628 lookahead = t
629 else:
630 sym = symstack.pop()
631 #--! TRACKING
632 if tracking:
633 lookahead.lineno = sym.lineno
634 lookahead.lexpos = sym.lexpos
635 #--! TRACKING
636 statestack.pop()
637 state = statestack[-1]
639 continue
641 # Call an error function here
642 raise RuntimeError('yacc: internal parser error!!!\n')
644 #--! parsedebug-end
646 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
647 # parseopt().
649 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
650 # This code is automatically generated by the ply/ygen.py script. Make
651 # changes to the parsedebug() method instead.
652 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
654 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
655 #--! parseopt-start
656 lookahead = None # Current lookahead symbol
657 lookaheadstack = [] # Stack of lookahead symbols
658 actions = self.action # Local reference to action table (to avoid lookup on self.)
659 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
660 prod = self.productions # Local reference to production list (to avoid lookup on self.)
661 defaulted_states = self.defaulted_states # Local reference to defaulted states
662 pslice = YaccProduction(None) # Production object passed to grammar rules
663 errorcount = 0 # Used during error recovery
666 # If no lexer was given, we will try to use the lex module
667 if not lexer:
668 from . import lex
669 lexer = lex.lexer
671 # Set up the lexer and parser objects on pslice
672 pslice.lexer = lexer
673 pslice.parser = self
675 # If input was supplied, pass to lexer
676 if input is not None:
677 lexer.input(input)
679 if tokenfunc is None:
680 # Tokenize function
681 get_token = lexer.token
682 else:
683 get_token = tokenfunc
685 # Set the parser() token method (sometimes used in error recovery)
686 self.token = get_token
688 # Set up the state and symbol stacks
690 statestack = [] # Stack of parsing states
691 self.statestack = statestack
692 symstack = [] # Stack of grammar symbols
693 self.symstack = symstack
695 pslice.stack = symstack # Put in the production
696 errtoken = None # Err token
698 # The start state is assumed to be (0,$end)
700 statestack.append(0)
701 sym = YaccSymbol()
702 sym.type = '$end'
703 symstack.append(sym)
704 state = 0
705 while True:
706 # Get the next symbol on the input. If a lookahead symbol
707 # is already set, we just use that. Otherwise, we'll pull
708 # the next token off of the lookaheadstack or from the lexer
711 if state not in defaulted_states:
712 if not lookahead:
713 if not lookaheadstack:
714 lookahead = get_token() # Get the next token
715 else:
716 lookahead = lookaheadstack.pop()
717 if not lookahead:
718 lookahead = YaccSymbol()
719 lookahead.type = '$end'
721 # Check the action table
722 ltype = lookahead.type
723 t = actions[state].get(ltype)
724 else:
725 t = defaulted_states[state]
728 if t is not None:
729 if t > 0:
730 # shift a symbol on the stack
731 statestack.append(t)
732 state = t
735 symstack.append(lookahead)
736 lookahead = None
738 # Decrease error count on successful shift
739 if errorcount:
740 errorcount -= 1
741 continue
743 if t < 0:
744 # reduce a symbol on the stack, emit a production
745 p = prod[-t]
746 pname = p.name
747 plen = p.len
749 # Get production function
750 sym = YaccSymbol()
751 sym.type = pname # Production name
752 sym.value = None
755 if plen:
756 targ = symstack[-plen-1:]
757 targ[0] = sym
759 #--! TRACKING
760 if tracking:
761 t1 = targ[1]
762 sym.lineno = t1.lineno
763 sym.lexpos = t1.lexpos
764 t1 = targ[-1]
765 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
766 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
767 #--! TRACKING
769 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
770 # The code enclosed in this section is duplicated
771 # below as a performance optimization. Make sure
772 # changes get made in both locations.
774 pslice.slice = targ
776 try:
777 # Call the grammar rule with our special slice object
778 del symstack[-plen:]
779 self.state = state
780 p.callable(pslice)
781 del statestack[-plen:]
782 symstack.append(sym)
783 state = goto[statestack[-1]][pname]
784 statestack.append(state)
785 except SyntaxError:
786 # If an error was set. Enter error recovery state
787 lookaheadstack.append(lookahead) # Save the current lookahead token
788 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
789 statestack.pop() # Pop back one state (before the reduce)
790 state = statestack[-1]
791 sym.type = 'error'
792 sym.value = 'error'
793 lookahead = sym
794 errorcount = error_count
795 self.errorok = False
797 continue
798 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
800 else:
802 #--! TRACKING
803 if tracking:
804 sym.lineno = lexer.lineno
805 sym.lexpos = lexer.lexpos
806 #--! TRACKING
808 targ = [sym]
810 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
811 # The code enclosed in this section is duplicated
812 # above as a performance optimization. Make sure
813 # changes get made in both locations.
815 pslice.slice = targ
817 try:
818 # Call the grammar rule with our special slice object
819 self.state = state
820 p.callable(pslice)
821 symstack.append(sym)
822 state = goto[statestack[-1]][pname]
823 statestack.append(state)
824 except SyntaxError:
825 # If an error was set. Enter error recovery state
826 lookaheadstack.append(lookahead) # Save the current lookahead token
827 statestack.pop() # Pop back one state (before the reduce)
828 state = statestack[-1]
829 sym.type = 'error'
830 sym.value = 'error'
831 lookahead = sym
832 errorcount = error_count
833 self.errorok = False
835 continue
836 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
838 if t == 0:
839 n = symstack[-1]
840 result = getattr(n, 'value', None)
841 return result
843 if t is None:
846 # We have some kind of parsing error here. To handle
847 # this, we are going to push the current token onto
848 # the tokenstack and replace it with an 'error' token.
849 # If there are any synchronization rules, they may
850 # catch it.
852 # In addition to pushing the error token, we call call
853 # the user defined p_error() function if this is the
854 # first syntax error. This function is only called if
855 # errorcount == 0.
856 if errorcount == 0 or self.errorok:
857 errorcount = error_count
858 self.errorok = False
859 errtoken = lookahead
860 if errtoken.type == '$end':
861 errtoken = None # End of file!
862 if self.errorfunc:
863 if errtoken and not hasattr(errtoken, 'lexer'):
864 errtoken.lexer = lexer
865 self.state = state
866 tok = call_errorfunc(self.errorfunc, errtoken, self)
867 if self.errorok:
868 # User must have done some kind of panic
869 # mode recovery on their own. The
870 # returned token is the next lookahead
871 lookahead = tok
872 errtoken = None
873 continue
874 else:
875 if errtoken:
876 if hasattr(errtoken, 'lineno'):
877 lineno = lookahead.lineno
878 else:
879 lineno = 0
880 if lineno:
881 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
882 else:
883 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
884 else:
885 sys.stderr.write('yacc: Parse error in input. EOF\n')
886 return
888 else:
889 errorcount = error_count
891 # case 1: the statestack only has 1 entry on it. If we're in this state, the
892 # entire parse has been rolled back and we're completely hosed. The token is
893 # discarded and we just keep going.
895 if len(statestack) <= 1 and lookahead.type != '$end':
896 lookahead = None
897 errtoken = None
898 state = 0
899 # Nuke the pushback stack
900 del lookaheadstack[:]
901 continue
903 # case 2: the statestack has a couple of entries on it, but we're
904 # at the end of the file. nuke the top entry and generate an error token
906 # Start nuking entries on the stack
907 if lookahead.type == '$end':
908 # Whoa. We're really hosed here. Bail out
909 return
911 if lookahead.type != 'error':
912 sym = symstack[-1]
913 if sym.type == 'error':
914 # Hmmm. Error is on top of stack, we'll just nuke input
915 # symbol and continue
916 #--! TRACKING
917 if tracking:
918 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
919 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
920 #--! TRACKING
921 lookahead = None
922 continue
924 # Create the error symbol for the first time and make it the new lookahead symbol
925 t = YaccSymbol()
926 t.type = 'error'
928 if hasattr(lookahead, 'lineno'):
929 t.lineno = t.endlineno = lookahead.lineno
930 if hasattr(lookahead, 'lexpos'):
931 t.lexpos = t.endlexpos = lookahead.lexpos
932 t.value = lookahead
933 lookaheadstack.append(lookahead)
934 lookahead = t
935 else:
936 sym = symstack.pop()
937 #--! TRACKING
938 if tracking:
939 lookahead.lineno = sym.lineno
940 lookahead.lexpos = sym.lexpos
941 #--! TRACKING
942 statestack.pop()
943 state = statestack[-1]
945 continue
947 # Call an error function here
948 raise RuntimeError('yacc: internal parser error!!!\n')
950 #--! parseopt-end
952 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
953 # parseopt_notrack().
955 # Optimized version of parseopt() with line number tracking removed.
956 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
957 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
958 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
960 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
961 #--! parseopt-notrack-start
962 lookahead = None # Current lookahead symbol
963 lookaheadstack = [] # Stack of lookahead symbols
964 actions = self.action # Local reference to action table (to avoid lookup on self.)
965 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
966 prod = self.productions # Local reference to production list (to avoid lookup on self.)
967 defaulted_states = self.defaulted_states # Local reference to defaulted states
968 pslice = YaccProduction(None) # Production object passed to grammar rules
969 errorcount = 0 # Used during error recovery
972 # If no lexer was given, we will try to use the lex module
973 if not lexer:
974 from . import lex
975 lexer = lex.lexer
977 # Set up the lexer and parser objects on pslice
978 pslice.lexer = lexer
979 pslice.parser = self
981 # If input was supplied, pass to lexer
982 if input is not None:
983 lexer.input(input)
985 if tokenfunc is None:
986 # Tokenize function
987 get_token = lexer.token
988 else:
989 get_token = tokenfunc
991 # Set the parser() token method (sometimes used in error recovery)
992 self.token = get_token
994 # Set up the state and symbol stacks
996 statestack = [] # Stack of parsing states
997 self.statestack = statestack
998 symstack = [] # Stack of grammar symbols
999 self.symstack = symstack
1001 pslice.stack = symstack # Put in the production
1002 errtoken = None # Err token
1004 # The start state is assumed to be (0,$end)
1006 statestack.append(0)
1007 sym = YaccSymbol()
1008 sym.type = '$end'
1009 symstack.append(sym)
1010 state = 0
1011 while True:
1012 # Get the next symbol on the input. If a lookahead symbol
1013 # is already set, we just use that. Otherwise, we'll pull
1014 # the next token off of the lookaheadstack or from the lexer
1017 if state not in defaulted_states:
1018 if not lookahead:
1019 if not lookaheadstack:
1020 lookahead = get_token() # Get the next token
1021 else:
1022 lookahead = lookaheadstack.pop()
1023 if not lookahead:
1024 lookahead = YaccSymbol()
1025 lookahead.type = '$end'
1027 # Check the action table
1028 ltype = lookahead.type
1029 t = actions[state].get(ltype)
1030 else:
1031 t = defaulted_states[state]
1034 if t is not None:
1035 if t > 0:
1036 # shift a symbol on the stack
1037 statestack.append(t)
1038 state = t
1041 symstack.append(lookahead)
1042 lookahead = None
1044 # Decrease error count on successful shift
1045 if errorcount:
1046 errorcount -= 1
1047 continue
1049 if t < 0:
1050 # reduce a symbol on the stack, emit a production
1051 p = prod[-t]
1052 pname = p.name
1053 plen = p.len
1055 # Get production function
1056 sym = YaccSymbol()
1057 sym.type = pname # Production name
1058 sym.value = None
1061 if plen:
1062 targ = symstack[-plen-1:]
1063 targ[0] = sym
1066 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1067 # The code enclosed in this section is duplicated
1068 # below as a performance optimization. Make sure
1069 # changes get made in both locations.
1071 pslice.slice = targ
1073 try:
1074 # Call the grammar rule with our special slice object
1075 del symstack[-plen:]
1076 self.state = state
1077 p.callable(pslice)
1078 del statestack[-plen:]
1079 symstack.append(sym)
1080 state = goto[statestack[-1]][pname]
1081 statestack.append(state)
1082 except SyntaxError:
1083 # If an error was set. Enter error recovery state
1084 lookaheadstack.append(lookahead) # Save the current lookahead token
1085 symstack.extend(targ[1:-1]) # Put the production slice back on the stack
1086 statestack.pop() # Pop back one state (before the reduce)
1087 state = statestack[-1]
1088 sym.type = 'error'
1089 sym.value = 'error'
1090 lookahead = sym
1091 errorcount = error_count
1092 self.errorok = False
1094 continue
1095 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1097 else:
1100 targ = [sym]
1102 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1103 # The code enclosed in this section is duplicated
1104 # above as a performance optimization. Make sure
1105 # changes get made in both locations.
1107 pslice.slice = targ
1109 try:
1110 # Call the grammar rule with our special slice object
1111 self.state = state
1112 p.callable(pslice)
1113 symstack.append(sym)
1114 state = goto[statestack[-1]][pname]
1115 statestack.append(state)
1116 except SyntaxError:
1117 # If an error was set. Enter error recovery state
1118 lookaheadstack.append(lookahead) # Save the current lookahead token
1119 statestack.pop() # Pop back one state (before the reduce)
1120 state = statestack[-1]
1121 sym.type = 'error'
1122 sym.value = 'error'
1123 lookahead = sym
1124 errorcount = error_count
1125 self.errorok = False
1127 continue
1128 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1130 if t == 0:
1131 n = symstack[-1]
1132 result = getattr(n, 'value', None)
1133 return result
1135 if t is None:
1138 # We have some kind of parsing error here. To handle
1139 # this, we are going to push the current token onto
1140 # the tokenstack and replace it with an 'error' token.
1141 # If there are any synchronization rules, they may
1142 # catch it.
1144 # In addition to pushing the error token, we call call
1145 # the user defined p_error() function if this is the
1146 # first syntax error. This function is only called if
1147 # errorcount == 0.
1148 if errorcount == 0 or self.errorok:
1149 errorcount = error_count
1150 self.errorok = False
1151 errtoken = lookahead
1152 if errtoken.type == '$end':
1153 errtoken = None # End of file!
1154 if self.errorfunc:
1155 if errtoken and not hasattr(errtoken, 'lexer'):
1156 errtoken.lexer = lexer
1157 self.state = state
1158 tok = call_errorfunc(self.errorfunc, errtoken, self)
1159 if self.errorok:
1160 # User must have done some kind of panic
1161 # mode recovery on their own. The
1162 # returned token is the next lookahead
1163 lookahead = tok
1164 errtoken = None
1165 continue
1166 else:
1167 if errtoken:
1168 if hasattr(errtoken, 'lineno'):
1169 lineno = lookahead.lineno
1170 else:
1171 lineno = 0
1172 if lineno:
1173 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1174 else:
1175 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1176 else:
1177 sys.stderr.write('yacc: Parse error in input. EOF\n')
1178 return
1180 else:
1181 errorcount = error_count
1183 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1184 # entire parse has been rolled back and we're completely hosed. The token is
1185 # discarded and we just keep going.
1187 if len(statestack) <= 1 and lookahead.type != '$end':
1188 lookahead = None
1189 errtoken = None
1190 state = 0
1191 # Nuke the pushback stack
1192 del lookaheadstack[:]
1193 continue
1195 # case 2: the statestack has a couple of entries on it, but we're
1196 # at the end of the file. nuke the top entry and generate an error token
1198 # Start nuking entries on the stack
1199 if lookahead.type == '$end':
1200 # Whoa. We're really hosed here. Bail out
1201 return
1203 if lookahead.type != 'error':
1204 sym = symstack[-1]
1205 if sym.type == 'error':
1206 # Hmmm. Error is on top of stack, we'll just nuke input
1207 # symbol and continue
1208 lookahead = None
1209 continue
1211 # Create the error symbol for the first time and make it the new lookahead symbol
1212 t = YaccSymbol()
1213 t.type = 'error'
1215 if hasattr(lookahead, 'lineno'):
1216 t.lineno = t.endlineno = lookahead.lineno
1217 if hasattr(lookahead, 'lexpos'):
1218 t.lexpos = t.endlexpos = lookahead.lexpos
1219 t.value = lookahead
1220 lookaheadstack.append(lookahead)
1221 lookahead = t
1222 else:
1223 sym = symstack.pop()
1224 statestack.pop()
1225 state = statestack[-1]
1227 continue
1229 # Call an error function here
1230 raise RuntimeError('yacc: internal parser error!!!\n')
1232 #--! parseopt-notrack-end
1234 # -----------------------------------------------------------------------------
1235 # === Grammar Representation ===
1237 # The following functions, classes, and variables are used to represent and
1238 # manipulate the rules that make up a grammar.
1239 # -----------------------------------------------------------------------------
1241 # regex matching identifiers
1242 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1244 # -----------------------------------------------------------------------------
1245 # class Production:
1247 # This class stores the raw information about a single production or grammar rule.
1248 # A grammar rule refers to a specification such as this:
1250 # expr : expr PLUS term
1252 # Here are the basic attributes defined on all productions
1254 # name - Name of the production. For example 'expr'
1255 # prod - A list of symbols on the right side ['expr','PLUS','term']
1256 # prec - Production precedence level
1257 # number - Production number.
1258 # func - Function that executes on reduce
1259 # file - File where production function is defined
1260 # lineno - Line number where production function is defined
1262 # The following attributes are defined or optional.
1264 # len - Length of the production (number of symbols on right hand side)
1265 # usyms - Set of unique symbols found in the production
1266 # -----------------------------------------------------------------------------
1268 class Production(object):
1269 reduced = 0
1270 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1271 self.name = name
1272 self.prod = tuple(prod)
1273 self.number = number
1274 self.func = func
1275 self.callable = None
1276 self.file = file
1277 self.line = line
1278 self.prec = precedence
1280 # Internal settings used during table construction
1282 self.len = len(self.prod) # Length of the production
1284 # Create a list of unique production symbols used in the production
1285 self.usyms = []
1286 for s in self.prod:
1287 if s not in self.usyms:
1288 self.usyms.append(s)
1290 # List of all LR items for the production
1291 self.lr_items = []
1292 self.lr_next = None
1294 # Create a string representation
1295 if self.prod:
1296 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1297 else:
1298 self.str = '%s -> <empty>' % self.name
1300 def __str__(self):
1301 return self.str
1303 def __repr__(self):
1304 return 'Production(' + str(self) + ')'
1306 def __len__(self):
1307 return len(self.prod)
1309 def __nonzero__(self):
1310 return 1
1312 def __getitem__(self, index):
1313 return self.prod[index]
1315 # Return the nth lr_item from the production (or None if at the end)
1316 def lr_item(self, n):
1317 if n > len(self.prod):
1318 return None
1319 p = LRItem(self, n)
1320 # Precompute the list of productions immediately following.
1321 try:
1322 p.lr_after = self.Prodnames[p.prod[n+1]]
1323 except (IndexError, KeyError):
1324 p.lr_after = []
1325 try:
1326 p.lr_before = p.prod[n-1]
1327 except IndexError:
1328 p.lr_before = None
1329 return p
1331 # Bind the production function name to a callable
1332 def bind(self, pdict):
1333 if self.func:
1334 self.callable = pdict[self.func]
1336 # This class serves as a minimal standin for Production objects when
1337 # reading table data from files. It only contains information
1338 # actually used by the LR parsing engine, plus some additional
1339 # debugging information.
1340 class MiniProduction(object):
1341 def __init__(self, str, name, len, func, file, line):
1342 self.name = name
1343 self.len = len
1344 self.func = func
1345 self.callable = None
1346 self.file = file
1347 self.line = line
1348 self.str = str
1350 def __str__(self):
1351 return self.str
1353 def __repr__(self):
1354 return 'MiniProduction(%s)' % self.str
1356 # Bind the production function name to a callable
1357 def bind(self, pdict):
1358 if self.func:
1359 self.callable = pdict[self.func]
1362 # -----------------------------------------------------------------------------
1363 # class LRItem
1365 # This class represents a specific stage of parsing a production rule. For
1366 # example:
1368 # expr : expr . PLUS term
1370 # In the above, the "." represents the current location of the parse. Here
1371 # basic attributes:
1373 # name - Name of the production. For example 'expr'
1374 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1375 # number - Production number.
1377 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1378 # then lr_next refers to 'expr -> expr PLUS . term'
1379 # lr_index - LR item index (location of the ".") in the prod list.
1380 # lookaheads - LALR lookahead symbols for this item
1381 # len - Length of the production (number of symbols on right hand side)
1382 # lr_after - List of all productions that immediately follow
1383 # lr_before - Grammar symbol immediately before
1384 # -----------------------------------------------------------------------------
1386 class LRItem(object):
1387 def __init__(self, p, n):
1388 self.name = p.name
1389 self.prod = list(p.prod)
1390 self.number = p.number
1391 self.lr_index = n
1392 self.lookaheads = {}
1393 self.prod.insert(n, '.')
1394 self.prod = tuple(self.prod)
1395 self.len = len(self.prod)
1396 self.usyms = p.usyms
1398 def __str__(self):
1399 if self.prod:
1400 s = '%s -> %s' % (self.name, ' '.join(self.prod))
1401 else:
1402 s = '%s -> <empty>' % self.name
1403 return s
1405 def __repr__(self):
1406 return 'LRItem(' + str(self) + ')'
1408 # -----------------------------------------------------------------------------
1409 # rightmost_terminal()
1411 # Return the rightmost terminal from a list of symbols. Used in add_production()
1412 # -----------------------------------------------------------------------------
1413 def rightmost_terminal(symbols, terminals):
1414 i = len(symbols) - 1
1415 while i >= 0:
1416 if symbols[i] in terminals:
1417 return symbols[i]
1418 i -= 1
1419 return None
1421 # -----------------------------------------------------------------------------
1422 # === GRAMMAR CLASS ===
1424 # The following class represents the contents of the specified grammar along
1425 # with various computed properties such as first sets, follow sets, LR items, etc.
1426 # This data is used for critical parts of the table generation process later.
1427 # -----------------------------------------------------------------------------
1429 class GrammarError(YaccError):
1430 pass
1432 class Grammar(object):
1433 def __init__(self, terminals):
1434 self.Productions = [None] # A list of all of the productions. The first
1435 # entry is always reserved for the purpose of
1436 # building an augmented grammar
1438 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1439 # productions of that nonterminal.
1441 self.Prodmap = {} # A dictionary that is only used to detect duplicate
1442 # productions.
1444 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1445 # list of the rules where they are used.
1447 for term in terminals:
1448 self.Terminals[term] = []
1450 self.Terminals['error'] = []
1452 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1453 # of rule numbers where they are used.
1455 self.First = {} # A dictionary of precomputed FIRST(x) symbols
1457 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1459 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1460 # form ('right',level) or ('nonassoc', level) or ('left',level)
1462 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammar.
1463 # This is only used to provide error checking and to generate
1464 # a warning about unused precedence rules.
1466 self.Start = None # Starting symbol for the grammar
1469 def __len__(self):
1470 return len(self.Productions)
1472 def __getitem__(self, index):
1473 return self.Productions[index]
1475 # -----------------------------------------------------------------------------
1476 # set_precedence()
1478 # Sets the precedence for a given terminal. assoc is the associativity such as
1479 # 'left','right', or 'nonassoc'. level is a numeric level.
1481 # -----------------------------------------------------------------------------
1483 def set_precedence(self, term, assoc, level):
1484 assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1485 if term in self.Precedence:
1486 raise GrammarError('Precedence already specified for terminal %r' % term)
1487 if assoc not in ['left', 'right', 'nonassoc']:
1488 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1489 self.Precedence[term] = (assoc, level)
1491 # -----------------------------------------------------------------------------
1492 # add_production()
1494 # Given an action function, this function assembles a production rule and
1495 # computes its precedence level.
1497 # The production rule is supplied as a list of symbols. For example,
1498 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1499 # symbols ['expr','PLUS','term'].
1501 # Precedence is determined by the precedence of the right-most non-terminal
1502 # or the precedence of a terminal specified by %prec.
1504 # A variety of error checks are performed to make sure production symbols
1505 # are valid and that %prec is used correctly.
1506 # -----------------------------------------------------------------------------
1508 def add_production(self, prodname, syms, func=None, file='', line=0):
1510 if prodname in self.Terminals:
1511 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1512 if prodname == 'error':
1513 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1514 if not _is_identifier.match(prodname):
1515 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1517 # Look for literal tokens
1518 for n, s in enumerate(syms):
1519 if s[0] in "'\"":
1520 try:
1521 c = eval(s)
1522 if (len(c) > 1):
1523 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1524 (file, line, s, prodname))
1525 if c not in self.Terminals:
1526 self.Terminals[c] = []
1527 syms[n] = c
1528 continue
1529 except SyntaxError:
1530 pass
1531 if not _is_identifier.match(s) and s != '%prec':
1532 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1534 # Determine the precedence level
1535 if '%prec' in syms:
1536 if syms[-1] == '%prec':
1537 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1538 if syms[-2] != '%prec':
1539 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1540 (file, line))
1541 precname = syms[-1]
1542 prodprec = self.Precedence.get(precname)
1543 if not prodprec:
1544 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1545 else:
1546 self.UsedPrecedence.add(precname)
1547 del syms[-2:] # Drop %prec from the rule
1548 else:
1549 # If no %prec, precedence is determined by the rightmost terminal symbol
1550 precname = rightmost_terminal(syms, self.Terminals)
1551 prodprec = self.Precedence.get(precname, ('right', 0))
1553 # See if the rule is already in the rulemap
1554 map = '%s -> %s' % (prodname, syms)
1555 if map in self.Prodmap:
1556 m = self.Prodmap[map]
1557 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1558 'Previous definition at %s:%d' % (m.file, m.line))
1560 # From this point on, everything is valid. Create a new Production instance
1561 pnumber = len(self.Productions)
1562 if prodname not in self.Nonterminals:
1563 self.Nonterminals[prodname] = []
1565 # Add the production number to Terminals and Nonterminals
1566 for t in syms:
1567 if t in self.Terminals:
1568 self.Terminals[t].append(pnumber)
1569 else:
1570 if t not in self.Nonterminals:
1571 self.Nonterminals[t] = []
1572 self.Nonterminals[t].append(pnumber)
1574 # Create a production and add it to the list of productions
1575 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1576 self.Productions.append(p)
1577 self.Prodmap[map] = p
1579 # Add to the global productions list
1580 try:
1581 self.Prodnames[prodname].append(p)
1582 except KeyError:
1583 self.Prodnames[prodname] = [p]
1585 # -----------------------------------------------------------------------------
1586 # set_start()
1588 # Sets the starting symbol and creates the augmented grammar. Production
1589 # rule 0 is S' -> start where start is the start symbol.
1590 # -----------------------------------------------------------------------------
1592 def set_start(self, start=None):
1593 if not start:
1594 start = self.Productions[1].name
1595 if start not in self.Nonterminals:
1596 raise GrammarError('start symbol %s undefined' % start)
1597 self.Productions[0] = Production(0, "S'", [start])
1598 self.Nonterminals[start].append(0)
1599 self.Start = start
1601 # -----------------------------------------------------------------------------
1602 # find_unreachable()
1604 # Find all of the nonterminal symbols that can't be reached from the starting
1605 # symbol. Returns a list of nonterminals that can't be reached.
1606 # -----------------------------------------------------------------------------
1608 def find_unreachable(self):
1610 # Mark all symbols that are reachable from a symbol s
1611 def mark_reachable_from(s):
1612 if s in reachable:
1613 return
1614 reachable.add(s)
1615 for p in self.Prodnames.get(s, []):
1616 for r in p.prod:
1617 mark_reachable_from(r)
1619 reachable = set()
1620 mark_reachable_from(self.Productions[0].prod[0])
1621 return [s for s in self.Nonterminals if s not in reachable]
1623 # -----------------------------------------------------------------------------
1624 # infinite_cycles()
1626 # This function looks at the various parsing rules and tries to detect
1627 # infinite recursion cycles (grammar rules where there is no possible way
1628 # to derive a string of only terminals).
1629 # -----------------------------------------------------------------------------
1631 def infinite_cycles(self):
1632 terminates = {}
1634 # Terminals:
1635 for t in self.Terminals:
1636 terminates[t] = True
1638 terminates['$end'] = True
1640 # Nonterminals:
1642 # Initialize to false:
1643 for n in self.Nonterminals:
1644 terminates[n] = False
1646 # Then propagate termination until no change:
1647 while True:
1648 some_change = False
1649 for (n, pl) in self.Prodnames.items():
1650 # Nonterminal n terminates iff any of its productions terminates.
1651 for p in pl:
1652 # Production p terminates iff all of its rhs symbols terminate.
1653 for s in p.prod:
1654 if not terminates[s]:
1655 # The symbol s does not terminate,
1656 # so production p does not terminate.
1657 p_terminates = False
1658 break
1659 else:
1660 # didn't break from the loop,
1661 # so every symbol s terminates
1662 # so production p terminates.
1663 p_terminates = True
1665 if p_terminates:
1666 # symbol n terminates!
1667 if not terminates[n]:
1668 terminates[n] = True
1669 some_change = True
1670 # Don't need to consider any more productions for this n.
1671 break
1673 if not some_change:
1674 break
1676 infinite = []
1677 for (s, term) in terminates.items():
1678 if not term:
1679 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1680 # s is used-but-not-defined, and we've already warned of that,
1681 # so it would be overkill to say that it's also non-terminating.
1682 pass
1683 else:
1684 infinite.append(s)
1686 return infinite
1688 # -----------------------------------------------------------------------------
1689 # undefined_symbols()
1691 # Find all symbols that were used the grammar, but not defined as tokens or
1692 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1693 # and prod is the production where the symbol was used.
1694 # -----------------------------------------------------------------------------
1695 def undefined_symbols(self):
1696 result = []
1697 for p in self.Productions:
1698 if not p:
1699 continue
1701 for s in p.prod:
1702 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1703 result.append((s, p))
1704 return result
1706 # -----------------------------------------------------------------------------
1707 # unused_terminals()
1709 # Find all terminals that were defined, but not used by the grammar. Returns
1710 # a list of all symbols.
1711 # -----------------------------------------------------------------------------
1712 def unused_terminals(self):
1713 unused_tok = []
1714 for s, v in self.Terminals.items():
1715 if s != 'error' and not v:
1716 unused_tok.append(s)
1718 return unused_tok
1720 # ------------------------------------------------------------------------------
1721 # unused_rules()
1723 # Find all grammar rules that were defined, but not used (maybe not reachable)
1724 # Returns a list of productions.
1725 # ------------------------------------------------------------------------------
1727 def unused_rules(self):
1728 unused_prod = []
1729 for s, v in self.Nonterminals.items():
1730 if not v:
1731 p = self.Prodnames[s][0]
1732 unused_prod.append(p)
1733 return unused_prod
1735 # -----------------------------------------------------------------------------
1736 # unused_precedence()
1738 # Returns a list of tuples (term,precedence) corresponding to precedence
1739 # rules that were never used by the grammar. term is the name of the terminal
1740 # on which precedence was applied and precedence is a string such as 'left' or
1741 # 'right' corresponding to the type of precedence.
1742 # -----------------------------------------------------------------------------
1744 def unused_precedence(self):
1745 unused = []
1746 for termname in self.Precedence:
1747 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1748 unused.append((termname, self.Precedence[termname][0]))
1750 return unused
1752 # -------------------------------------------------------------------------
1753 # _first()
1755 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1757 # During execution of compute_first1, the result may be incomplete.
1758 # Afterward (e.g., when called from compute_follow()), it will be complete.
1759 # -------------------------------------------------------------------------
1760 def _first(self, beta):
1762 # We are computing First(x1,x2,x3,...,xn)
1763 result = []
1764 for x in beta:
1765 x_produces_empty = False
1767 # Add all the non-<empty> symbols of First[x] to the result.
1768 for f in self.First[x]:
1769 if f == '<empty>':
1770 x_produces_empty = True
1771 else:
1772 if f not in result:
1773 result.append(f)
1775 if x_produces_empty:
1776 # We have to consider the next x in beta,
1777 # i.e. stay in the loop.
1778 pass
1779 else:
1780 # We don't have to consider any further symbols in beta.
1781 break
1782 else:
1783 # There was no 'break' from the loop,
1784 # so x_produces_empty was true for all x in beta,
1785 # so beta produces empty as well.
1786 result.append('<empty>')
1788 return result
1790 # -------------------------------------------------------------------------
1791 # compute_first()
1793 # Compute the value of FIRST1(X) for all symbols
1794 # -------------------------------------------------------------------------
1795 def compute_first(self):
1796 if self.First:
1797 return self.First
1799 # Terminals:
1800 for t in self.Terminals:
1801 self.First[t] = [t]
1803 self.First['$end'] = ['$end']
1805 # Nonterminals:
1807 # Initialize to the empty set:
1808 for n in self.Nonterminals:
1809 self.First[n] = []
1811 # Then propagate symbols until no change:
1812 while True:
1813 some_change = False
1814 for n in self.Nonterminals:
1815 for p in self.Prodnames[n]:
1816 for f in self._first(p.prod):
1817 if f not in self.First[n]:
1818 self.First[n].append(f)
1819 some_change = True
1820 if not some_change:
1821 break
1823 return self.First
1825 # ---------------------------------------------------------------------
1826 # compute_follow()
1828 # Computes all of the follow sets for every non-terminal symbol. The
1829 # follow set is the set of all symbols that might follow a given
1830 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1831 # ---------------------------------------------------------------------
1832 def compute_follow(self, start=None):
1833 # If already computed, return the result
1834 if self.Follow:
1835 return self.Follow
1837 # If first sets not computed yet, do that first.
1838 if not self.First:
1839 self.compute_first()
1841 # Add '$end' to the follow list of the start symbol
1842 for k in self.Nonterminals:
1843 self.Follow[k] = []
1845 if not start:
1846 start = self.Productions[1].name
1848 self.Follow[start] = ['$end']
1850 while True:
1851 didadd = False
1852 for p in self.Productions[1:]:
1853 # Here is the production set
1854 for i, B in enumerate(p.prod):
1855 if B in self.Nonterminals:
1856 # Okay. We got a non-terminal in a production
1857 fst = self._first(p.prod[i+1:])
1858 hasempty = False
1859 for f in fst:
1860 if f != '<empty>' and f not in self.Follow[B]:
1861 self.Follow[B].append(f)
1862 didadd = True
1863 if f == '<empty>':
1864 hasempty = True
1865 if hasempty or i == (len(p.prod)-1):
1866 # Add elements of follow(a) to follow(b)
1867 for f in self.Follow[p.name]:
1868 if f not in self.Follow[B]:
1869 self.Follow[B].append(f)
1870 didadd = True
1871 if not didadd:
1872 break
1873 return self.Follow
1876 # -----------------------------------------------------------------------------
1877 # build_lritems()
1879 # This function walks the list of productions and builds a complete set of the
1880 # LR items. The LR items are stored in two ways: First, they are uniquely
1881 # numbered and placed in the list _lritems. Second, a linked list of LR items
1882 # is built for each production. For example:
1884 # E -> E PLUS E
1886 # Creates the list
1888 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1889 # -----------------------------------------------------------------------------
1891 def build_lritems(self):
1892 for p in self.Productions:
1893 lastlri = p
1894 i = 0
1895 lr_items = []
1896 while True:
1897 if i > len(p):
1898 lri = None
1899 else:
1900 lri = LRItem(p, i)
1901 # Precompute the list of productions immediately following
1902 try:
1903 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1904 except (IndexError, KeyError):
1905 lri.lr_after = []
1906 try:
1907 lri.lr_before = lri.prod[i-1]
1908 except IndexError:
1909 lri.lr_before = None
1911 lastlri.lr_next = lri
1912 if not lri:
1913 break
1914 lr_items.append(lri)
1915 lastlri = lri
1916 i += 1
1917 p.lr_items = lr_items
1919 # -----------------------------------------------------------------------------
1920 # == Class LRTable ==
1922 # This basic class represents a basic table of LR parsing information.
1923 # Methods for generating the tables are not defined here. They are defined
1924 # in the derived class LRGeneratedTable.
1925 # -----------------------------------------------------------------------------
1927 class VersionError(YaccError):
1928 pass
1930 class LRTable(object):
1931 def __init__(self):
1932 self.lr_action = None
1933 self.lr_goto = None
1934 self.lr_productions = None
1935 self.lr_method = None
1937 def read_table(self, module):
1938 if isinstance(module, types.ModuleType):
1939 parsetab = module
1940 else:
1941 exec('import %s' % module)
1942 parsetab = sys.modules[module]
1944 if parsetab._tabversion != __tabversion__:
1945 raise VersionError('yacc table file version is out of date')
1947 self.lr_action = parsetab._lr_action
1948 self.lr_goto = parsetab._lr_goto
1950 self.lr_productions = []
1951 for p in parsetab._lr_productions:
1952 self.lr_productions.append(MiniProduction(*p))
1954 self.lr_method = parsetab._lr_method
1955 return parsetab._lr_signature
1957 def read_pickle(self, filename):
1958 try:
1959 import cPickle as pickle
1960 except ImportError:
1961 import pickle
1963 if not os.path.exists(filename):
1964 raise ImportError
1966 in_f = open(filename, 'rb')
1968 tabversion = pickle.load(in_f)
1969 if tabversion != __tabversion__:
1970 raise VersionError('yacc table file version is out of date')
1971 self.lr_method = pickle.load(in_f)
1972 signature = pickle.load(in_f)
1973 self.lr_action = pickle.load(in_f)
1974 self.lr_goto = pickle.load(in_f)
1975 productions = pickle.load(in_f)
1977 self.lr_productions = []
1978 for p in productions:
1979 self.lr_productions.append(MiniProduction(*p))
1981 in_f.close()
1982 return signature
1984 # Bind all production function names to callable objects in pdict
1985 def bind_callables(self, pdict):
1986 for p in self.lr_productions:
1987 p.bind(pdict)
1990 # -----------------------------------------------------------------------------
1991 # === LR Generator ===
1993 # The following classes and functions are used to generate LR parsing tables on
1994 # a grammar.
1995 # -----------------------------------------------------------------------------
1997 # -----------------------------------------------------------------------------
1998 # digraph()
1999 # traverse()
2001 # The following two functions are used to compute set valued functions
2002 # of the form:
2004 # F(x) = F'(x) U U{F(y) | x R y}
2006 # This is used to compute the values of Read() sets as well as FOLLOW sets
2007 # in LALR(1) generation.
2009 # Inputs: X - An input set
2010 # R - A relation
2011 # FP - Set-valued function
2012 # ------------------------------------------------------------------------------
2014 def digraph(X, R, FP):
2015 N = {}
2016 for x in X:
2017 N[x] = 0
2018 stack = []
2019 F = {}
2020 for x in X:
2021 if N[x] == 0:
2022 traverse(x, N, stack, F, X, R, FP)
2023 return F
2025 def traverse(x, N, stack, F, X, R, FP):
2026 stack.append(x)
2027 d = len(stack)
2028 N[x] = d
2029 F[x] = FP(x) # F(X) <- F'(x)
2031 rel = R(x) # Get y's related to x
2032 for y in rel:
2033 if N[y] == 0:
2034 traverse(y, N, stack, F, X, R, FP)
2035 N[x] = min(N[x], N[y])
2036 for a in F.get(y, []):
2037 if a not in F[x]:
2038 F[x].append(a)
2039 if N[x] == d:
2040 N[stack[-1]] = MAXINT
2041 F[stack[-1]] = F[x]
2042 element = stack.pop()
2043 while element != x:
2044 N[stack[-1]] = MAXINT
2045 F[stack[-1]] = F[x]
2046 element = stack.pop()
2048 class LALRError(YaccError):
2049 pass
2051 # -----------------------------------------------------------------------------
2052 # == LRGeneratedTable ==
2054 # This class implements the LR table generation algorithm. There are no
2055 # public methods except for write()
2056 # -----------------------------------------------------------------------------
2058 class LRGeneratedTable(LRTable):
2059 def __init__(self, grammar, method='LALR', log=None):
2060 if method not in ['SLR', 'LALR']:
2061 raise LALRError('Unsupported method %s' % method)
2063 self.grammar = grammar
2064 self.lr_method = method
2066 # Set up the logger
2067 if not log:
2068 log = NullLogger()
2069 self.log = log
2071 # Internal attributes
2072 self.lr_action = {} # Action table
2073 self.lr_goto = {} # Goto table
2074 self.lr_productions = grammar.Productions # Copy of grammar Production array
2075 self.lr_goto_cache = {} # Cache of computed gotos
2076 self.lr0_cidhash = {} # Cache of closures
2078 self._add_count = 0 # Internal counter used to detect cycles
2080 # Diagonistic information filled in by the table generator
2081 self.sr_conflict = 0
2082 self.rr_conflict = 0
2083 self.conflicts = [] # List of conflicts
2085 self.sr_conflicts = []
2086 self.rr_conflicts = []
2088 # Build the tables
2089 self.grammar.build_lritems()
2090 self.grammar.compute_first()
2091 self.grammar.compute_follow()
2092 self.lr_parse_table()
2094 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2096 def lr0_closure(self, I):
2097 self._add_count += 1
2099 # Add everything in I to J
2100 J = I[:]
2101 didadd = True
2102 while didadd:
2103 didadd = False
2104 for j in J:
2105 for x in j.lr_after:
2106 if getattr(x, 'lr0_added', 0) == self._add_count:
2107 continue
2108 # Add B --> .G to J
2109 J.append(x.lr_next)
2110 x.lr0_added = self._add_count
2111 didadd = True
2113 return J
2115 # Compute the LR(0) goto function goto(I,X) where I is a set
2116 # of LR(0) items and X is a grammar symbol. This function is written
2117 # in a way that guarantees uniqueness of the generated goto sets
2118 # (i.e. the same goto set will never be returned as two different Python
2119 # objects). With uniqueness, we can later do fast set comparisons using
2120 # id(obj) instead of element-wise comparison.
2122 def lr0_goto(self, I, x):
2123 # First we look for a previously cached entry
2124 g = self.lr_goto_cache.get((id(I), x))
2125 if g:
2126 return g
2128 # Now we generate the goto set in a way that guarantees uniqueness
2129 # of the result
2131 s = self.lr_goto_cache.get(x)
2132 if not s:
2133 s = {}
2134 self.lr_goto_cache[x] = s
2136 gs = []
2137 for p in I:
2138 n = p.lr_next
2139 if n and n.lr_before == x:
2140 s1 = s.get(id(n))
2141 if not s1:
2142 s1 = {}
2143 s[id(n)] = s1
2144 gs.append(n)
2145 s = s1
2146 g = s.get('$end')
2147 if not g:
2148 if gs:
2149 g = self.lr0_closure(gs)
2150 s['$end'] = g
2151 else:
2152 s['$end'] = gs
2153 self.lr_goto_cache[(id(I), x)] = g
2154 return g
2156 # Compute the LR(0) sets of item function
2157 def lr0_items(self):
2158 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2159 i = 0
2160 for I in C:
2161 self.lr0_cidhash[id(I)] = i
2162 i += 1
2164 # Loop over the items in C and each grammar symbols
2165 i = 0
2166 while i < len(C):
2167 I = C[i]
2168 i += 1
2170 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2171 asyms = {}
2172 for ii in I:
2173 for s in ii.usyms:
2174 asyms[s] = None
2176 for x in asyms:
2177 g = self.lr0_goto(I, x)
2178 if not g or id(g) in self.lr0_cidhash:
2179 continue
2180 self.lr0_cidhash[id(g)] = len(C)
2181 C.append(g)
2183 return C
2185 # -----------------------------------------------------------------------------
2186 # ==== LALR(1) Parsing ====
2188 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2189 # relying upon Follow() sets when performing reductions, a more selective
2190 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2191 # Thus, we mainly just have to focus on calculating the lookahead sets.
2193 # The method used here is due to DeRemer and Pennelo (1982).
2195 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2196 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2197 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2199 # Further details can also be found in:
2201 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2202 # McGraw-Hill Book Company, (1985).
2204 # -----------------------------------------------------------------------------
2206 # -----------------------------------------------------------------------------
2207 # compute_nullable_nonterminals()
2209 # Creates a dictionary containing all of the non-terminals that might produce
2210 # an empty production.
2211 # -----------------------------------------------------------------------------
2213 def compute_nullable_nonterminals(self):
2214 nullable = set()
2215 num_nullable = 0
2216 while True:
2217 for p in self.grammar.Productions[1:]:
2218 if p.len == 0:
2219 nullable.add(p.name)
2220 continue
2221 for t in p.prod:
2222 if t not in nullable:
2223 break
2224 else:
2225 nullable.add(p.name)
2226 if len(nullable) == num_nullable:
2227 break
2228 num_nullable = len(nullable)
2229 return nullable
2231 # -----------------------------------------------------------------------------
2232 # find_nonterminal_trans(C)
2234 # Given a set of LR(0) items, this functions finds all of the non-terminal
2235 # transitions. These are transitions in which a dot appears immediately before
2236 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2237 # is the state number and N is the nonterminal symbol.
2239 # The input C is the set of LR(0) items.
2240 # -----------------------------------------------------------------------------
2242 def find_nonterminal_transitions(self, C):
2243 trans = []
2244 for stateno, state in enumerate(C):
2245 for p in state:
2246 if p.lr_index < p.len - 1:
2247 t = (stateno, p.prod[p.lr_index+1])
2248 if t[1] in self.grammar.Nonterminals:
2249 if t not in trans:
2250 trans.append(t)
2251 return trans
2253 # -----------------------------------------------------------------------------
2254 # dr_relation()
2256 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2257 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2259 # Returns a list of terminals.
2260 # -----------------------------------------------------------------------------
2262 def dr_relation(self, C, trans, nullable):
2263 state, N = trans
2264 terms = []
2266 g = self.lr0_goto(C[state], N)
2267 for p in g:
2268 if p.lr_index < p.len - 1:
2269 a = p.prod[p.lr_index+1]
2270 if a in self.grammar.Terminals:
2271 if a not in terms:
2272 terms.append(a)
2274 # This extra bit is to handle the start state
2275 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2276 terms.append('$end')
2278 return terms
2280 # -----------------------------------------------------------------------------
2281 # reads_relation()
2283 # Computes the READS() relation (p,A) READS (t,C).
2284 # -----------------------------------------------------------------------------
2286 def reads_relation(self, C, trans, empty):
2287 # Look for empty transitions
2288 rel = []
2289 state, N = trans
2291 g = self.lr0_goto(C[state], N)
2292 j = self.lr0_cidhash.get(id(g), -1)
2293 for p in g:
2294 if p.lr_index < p.len - 1:
2295 a = p.prod[p.lr_index + 1]
2296 if a in empty:
2297 rel.append((j, a))
2299 return rel
2301 # -----------------------------------------------------------------------------
2302 # compute_lookback_includes()
2304 # Determines the lookback and includes relations
2306 # LOOKBACK:
2308 # This relation is determined by running the LR(0) state machine forward.
2309 # For example, starting with a production "N : . A B C", we run it forward
2310 # to obtain "N : A B C ." We then build a relationship between this final
2311 # state and the starting state. These relationships are stored in a dictionary
2312 # lookdict.
2314 # INCLUDES:
2316 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2318 # This relation is used to determine non-terminal transitions that occur
2319 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2320 # if the following holds:
2322 # B -> LAT, where T -> epsilon and p' -L-> p
2324 # L is essentially a prefix (which may be empty), T is a suffix that must be
2325 # able to derive an empty string. State p' must lead to state p with the string L.
2327 # -----------------------------------------------------------------------------
2329 def compute_lookback_includes(self, C, trans, nullable):
2330 lookdict = {} # Dictionary of lookback relations
2331 includedict = {} # Dictionary of include relations
2333 # Make a dictionary of non-terminal transitions
2334 dtrans = {}
2335 for t in trans:
2336 dtrans[t] = 1
2338 # Loop over all transitions and compute lookbacks and includes
2339 for state, N in trans:
2340 lookb = []
2341 includes = []
2342 for p in C[state]:
2343 if p.name != N:
2344 continue
2346 # Okay, we have a name match. We now follow the production all the way
2347 # through the state machine until we get the . on the right hand side
2349 lr_index = p.lr_index
2350 j = state
2351 while lr_index < p.len - 1:
2352 lr_index = lr_index + 1
2353 t = p.prod[lr_index]
2355 # Check to see if this symbol and state are a non-terminal transition
2356 if (j, t) in dtrans:
2357 # Yes. Okay, there is some chance that this is an includes relation
2358 # the only way to know for certain is whether the rest of the
2359 # production derives empty
2361 li = lr_index + 1
2362 while li < p.len:
2363 if p.prod[li] in self.grammar.Terminals:
2364 break # No forget it
2365 if p.prod[li] not in nullable:
2366 break
2367 li = li + 1
2368 else:
2369 # Appears to be a relation between (j,t) and (state,N)
2370 includes.append((j, t))
2372 g = self.lr0_goto(C[j], t) # Go to next set
2373 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2375 # When we get here, j is the final state, now we have to locate the production
2376 for r in C[j]:
2377 if r.name != p.name:
2378 continue
2379 if r.len != p.len:
2380 continue
2381 i = 0
2382 # This look is comparing a production ". A B C" with "A B C ."
2383 while i < r.lr_index:
2384 if r.prod[i] != p.prod[i+1]:
2385 break
2386 i = i + 1
2387 else:
2388 lookb.append((j, r))
2389 for i in includes:
2390 if i not in includedict:
2391 includedict[i] = []
2392 includedict[i].append((state, N))
2393 lookdict[(state, N)] = lookb
2395 return lookdict, includedict
2397 # -----------------------------------------------------------------------------
2398 # compute_read_sets()
2400 # Given a set of LR(0) items, this function computes the read sets.
2402 # Inputs: C = Set of LR(0) items
2403 # ntrans = Set of nonterminal transitions
2404 # nullable = Set of empty transitions
2406 # Returns a set containing the read sets
2407 # -----------------------------------------------------------------------------
2409 def compute_read_sets(self, C, ntrans, nullable):
2410 FP = lambda x: self.dr_relation(C, x, nullable)
2411 R = lambda x: self.reads_relation(C, x, nullable)
2412 F = digraph(ntrans, R, FP)
2413 return F
2415 # -----------------------------------------------------------------------------
2416 # compute_follow_sets()
2418 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2419 # and an include set, this function computes the follow sets
2421 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2423 # Inputs:
2424 # ntrans = Set of nonterminal transitions
2425 # readsets = Readset (previously computed)
2426 # inclsets = Include sets (previously computed)
2428 # Returns a set containing the follow sets
2429 # -----------------------------------------------------------------------------
2431 def compute_follow_sets(self, ntrans, readsets, inclsets):
2432 FP = lambda x: readsets[x]
2433 R = lambda x: inclsets.get(x, [])
2434 F = digraph(ntrans, R, FP)
2435 return F
2437 # -----------------------------------------------------------------------------
2438 # add_lookaheads()
2440 # Attaches the lookahead symbols to grammar rules.
2442 # Inputs: lookbacks - Set of lookback relations
2443 # followset - Computed follow set
2445 # This function directly attaches the lookaheads to productions contained
2446 # in the lookbacks set
2447 # -----------------------------------------------------------------------------
2449 def add_lookaheads(self, lookbacks, followset):
2450 for trans, lb in lookbacks.items():
2451 # Loop over productions in lookback
2452 for state, p in lb:
2453 if state not in p.lookaheads:
2454 p.lookaheads[state] = []
2455 f = followset.get(trans, [])
2456 for a in f:
2457 if a not in p.lookaheads[state]:
2458 p.lookaheads[state].append(a)
2460 # -----------------------------------------------------------------------------
2461 # add_lalr_lookaheads()
2463 # This function does all of the work of adding lookahead information for use
2464 # with LALR parsing
2465 # -----------------------------------------------------------------------------
2467 def add_lalr_lookaheads(self, C):
2468 # Determine all of the nullable nonterminals
2469 nullable = self.compute_nullable_nonterminals()
2471 # Find all non-terminal transitions
2472 trans = self.find_nonterminal_transitions(C)
2474 # Compute read sets
2475 readsets = self.compute_read_sets(C, trans, nullable)
2477 # Compute lookback/includes relations
2478 lookd, included = self.compute_lookback_includes(C, trans, nullable)
2480 # Compute LALR FOLLOW sets
2481 followsets = self.compute_follow_sets(trans, readsets, included)
2483 # Add all of the lookaheads
2484 self.add_lookaheads(lookd, followsets)
2486 # -----------------------------------------------------------------------------
2487 # lr_parse_table()
2489 # This function constructs the parse tables for SLR or LALR
2490 # -----------------------------------------------------------------------------
2491 def lr_parse_table(self):
2492 Productions = self.grammar.Productions
2493 Precedence = self.grammar.Precedence
2494 goto = self.lr_goto # Goto array
2495 action = self.lr_action # Action array
2496 log = self.log # Logger for output
2498 actionp = {} # Action production array (temporary)
2500 log.info('Parsing method: %s', self.lr_method)
2502 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2503 # This determines the number of states
2505 C = self.lr0_items()
2507 if self.lr_method == 'LALR':
2508 self.add_lalr_lookaheads(C)
2510 # Build the parser table, state by state
2511 st = 0
2512 for I in C:
2513 # Loop over each production in I
2514 actlist = [] # List of actions
2515 st_action = {}
2516 st_actionp = {}
2517 st_goto = {}
2518 log.info('')
2519 log.info('state %d', st)
2520 log.info('')
2521 for p in I:
2522 log.info(' (%d) %s', p.number, p)
2523 log.info('')
2525 for p in I:
2526 if p.len == p.lr_index + 1:
2527 if p.name == "S'":
2528 # Start symbol. Accept!
2529 st_action['$end'] = 0
2530 st_actionp['$end'] = p
2531 else:
2532 # We are at the end of a production. Reduce!
2533 if self.lr_method == 'LALR':
2534 laheads = p.lookaheads[st]
2535 else:
2536 laheads = self.grammar.Follow[p.name]
2537 for a in laheads:
2538 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2539 r = st_action.get(a)
2540 if r is not None:
2541 # Whoa. Have a shift/reduce or reduce/reduce conflict
2542 if r > 0:
2543 # Need to decide on shift or reduce here
2544 # By default we favor shifting. Need to add
2545 # some precedence rules here.
2547 # Shift precedence comes from the token
2548 sprec, slevel = Precedence.get(a, ('right', 0))
2550 # Reduce precedence comes from rule being reduced (p)
2551 rprec, rlevel = Productions[p.number].prec
2553 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2554 # We really need to reduce here.
2555 st_action[a] = -p.number
2556 st_actionp[a] = p
2557 if not slevel and not rlevel:
2558 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2559 self.sr_conflicts.append((st, a, 'reduce'))
2560 Productions[p.number].reduced += 1
2561 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2562 st_action[a] = None
2563 else:
2564 # Hmmm. Guess we'll keep the shift
2565 if not rlevel:
2566 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2567 self.sr_conflicts.append((st, a, 'shift'))
2568 elif r < 0:
2569 # Reduce/reduce conflict. In this case, we favor the rule
2570 # that was defined first in the grammar file
2571 oldp = Productions[-r]
2572 pp = Productions[p.number]
2573 if oldp.line > pp.line:
2574 st_action[a] = -p.number
2575 st_actionp[a] = p
2576 chosenp, rejectp = pp, oldp
2577 Productions[p.number].reduced += 1
2578 Productions[oldp.number].reduced -= 1
2579 else:
2580 chosenp, rejectp = oldp, pp
2581 self.rr_conflicts.append((st, chosenp, rejectp))
2582 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2583 a, st_actionp[a].number, st_actionp[a])
2584 else:
2585 raise LALRError('Unknown conflict in state %d' % st)
2586 else:
2587 st_action[a] = -p.number
2588 st_actionp[a] = p
2589 Productions[p.number].reduced += 1
2590 else:
2591 i = p.lr_index
2592 a = p.prod[i+1] # Get symbol right after the "."
2593 if a in self.grammar.Terminals:
2594 g = self.lr0_goto(I, a)
2595 j = self.lr0_cidhash.get(id(g), -1)
2596 if j >= 0:
2597 # We are in a shift state
2598 actlist.append((a, p, 'shift and go to state %d' % j))
2599 r = st_action.get(a)
2600 if r is not None:
2601 # Whoa have a shift/reduce or shift/shift conflict
2602 if r > 0:
2603 if r != j:
2604 raise LALRError('Shift/shift conflict in state %d' % st)
2605 elif r < 0:
2606 # Do a precedence check.
2607 # - if precedence of reduce rule is higher, we reduce.
2608 # - if precedence of reduce is same and left assoc, we reduce.
2609 # - otherwise we shift
2611 # Shift precedence comes from the token
2612 sprec, slevel = Precedence.get(a, ('right', 0))
2614 # Reduce precedence comes from the rule that could have been reduced
2615 rprec, rlevel = Productions[st_actionp[a].number].prec
2617 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2618 # We decide to shift here... highest precedence to shift
2619 Productions[st_actionp[a].number].reduced -= 1
2620 st_action[a] = j
2621 st_actionp[a] = p
2622 if not rlevel:
2623 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2624 self.sr_conflicts.append((st, a, 'shift'))
2625 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2626 st_action[a] = None
2627 else:
2628 # Hmmm. Guess we'll keep the reduce
2629 if not slevel and not rlevel:
2630 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2631 self.sr_conflicts.append((st, a, 'reduce'))
2633 else:
2634 raise LALRError('Unknown conflict in state %d' % st)
2635 else:
2636 st_action[a] = j
2637 st_actionp[a] = p
2639 # Print the actions associated with each terminal
2640 _actprint = {}
2641 for a, p, m in actlist:
2642 if a in st_action:
2643 if p is st_actionp[a]:
2644 log.info(' %-15s %s', a, m)
2645 _actprint[(a, m)] = 1
2646 log.info('')
2647 # Print the actions that were not used. (debugging)
2648 not_used = 0
2649 for a, p, m in actlist:
2650 if a in st_action:
2651 if p is not st_actionp[a]:
2652 if not (a, m) in _actprint:
2653 log.debug(' ! %-15s [ %s ]', a, m)
2654 not_used = 1
2655 _actprint[(a, m)] = 1
2656 if not_used:
2657 log.debug('')
2659 # Construct the goto table for this state
2661 nkeys = {}
2662 for ii in I:
2663 for s in ii.usyms:
2664 if s in self.grammar.Nonterminals:
2665 nkeys[s] = None
2666 for n in nkeys:
2667 g = self.lr0_goto(I, n)
2668 j = self.lr0_cidhash.get(id(g), -1)
2669 if j >= 0:
2670 st_goto[n] = j
2671 log.info(' %-30s shift and go to state %d', n, j)
2673 action[st] = st_action
2674 actionp[st] = st_actionp
2675 goto[st] = st_goto
2676 st += 1
2678 # -----------------------------------------------------------------------------
2679 # write()
2681 # This function writes the LR parsing tables to a file
2682 # -----------------------------------------------------------------------------
2684 def write_table(self, tabmodule, outputdir='', signature=''):
2685 if isinstance(tabmodule, types.ModuleType):
2686 raise IOError("Won't overwrite existing tabmodule")
2688 basemodulename = tabmodule.split('.')[-1]
2689 filename = os.path.join(outputdir, basemodulename) + '.py'
2690 try:
2691 f = open(filename, 'w')
2693 f.write('''
2694 # %s
2695 # This file is automatically generated. Do not edit.
2696 # pylint: disable=W,C,R
2697 _tabversion = %r
2699 _lr_method = %r
2701 _lr_signature = %r
2702 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2704 # Change smaller to 0 to go back to original tables
2705 smaller = 1
2707 # Factor out names to try and make smaller
2708 if smaller:
2709 items = {}
2711 for s, nd in self.lr_action.items():
2712 for name, v in nd.items():
2713 i = items.get(name)
2714 if not i:
2715 i = ([], [])
2716 items[name] = i
2717 i[0].append(s)
2718 i[1].append(v)
2720 f.write('\n_lr_action_items = {')
2721 for k, v in items.items():
2722 f.write('%r:([' % k)
2723 for i in v[0]:
2724 f.write('%r,' % i)
2725 f.write('],[')
2726 for i in v[1]:
2727 f.write('%r,' % i)
2729 f.write(']),')
2730 f.write('}\n')
2732 f.write('''
2733 _lr_action = {}
2734 for _k, _v in _lr_action_items.items():
2735 for _x,_y in zip(_v[0],_v[1]):
2736 if not _x in _lr_action: _lr_action[_x] = {}
2737 _lr_action[_x][_k] = _y
2738 del _lr_action_items
2739 ''')
2741 else:
2742 f.write('\n_lr_action = { ')
2743 for k, v in self.lr_action.items():
2744 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2745 f.write('}\n')
2747 if smaller:
2748 # Factor out names to try and make smaller
2749 items = {}
2751 for s, nd in self.lr_goto.items():
2752 for name, v in nd.items():
2753 i = items.get(name)
2754 if not i:
2755 i = ([], [])
2756 items[name] = i
2757 i[0].append(s)
2758 i[1].append(v)
2760 f.write('\n_lr_goto_items = {')
2761 for k, v in items.items():
2762 f.write('%r:([' % k)
2763 for i in v[0]:
2764 f.write('%r,' % i)
2765 f.write('],[')
2766 for i in v[1]:
2767 f.write('%r,' % i)
2769 f.write(']),')
2770 f.write('}\n')
2772 f.write('''
2773 _lr_goto = {}
2774 for _k, _v in _lr_goto_items.items():
2775 for _x, _y in zip(_v[0], _v[1]):
2776 if not _x in _lr_goto: _lr_goto[_x] = {}
2777 _lr_goto[_x][_k] = _y
2778 del _lr_goto_items
2779 ''')
2780 else:
2781 f.write('\n_lr_goto = { ')
2782 for k, v in self.lr_goto.items():
2783 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2784 f.write('}\n')
2786 # Write production table
2787 f.write('_lr_productions = [\n')
2788 for p in self.lr_productions:
2789 if p.func:
2790 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2791 p.func, os.path.basename(p.file), p.line))
2792 else:
2793 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2794 f.write(']\n')
2795 f.close()
2797 except IOError as e:
2798 raise
2801 # -----------------------------------------------------------------------------
2802 # pickle_table()
2804 # This function pickles the LR parsing tables to a supplied file object
2805 # -----------------------------------------------------------------------------
2807 def pickle_table(self, filename, signature=''):
2808 try:
2809 import cPickle as pickle
2810 except ImportError:
2811 import pickle
2812 with open(filename, 'wb') as outf:
2813 pickle.dump(__tabversion__, outf, pickle_protocol)
2814 pickle.dump(self.lr_method, outf, pickle_protocol)
2815 pickle.dump(signature, outf, pickle_protocol)
2816 pickle.dump(self.lr_action, outf, pickle_protocol)
2817 pickle.dump(self.lr_goto, outf, pickle_protocol)
2819 outp = []
2820 for p in self.lr_productions:
2821 if p.func:
2822 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2823 else:
2824 outp.append((str(p), p.name, p.len, None, None, None))
2825 pickle.dump(outp, outf, pickle_protocol)
2827 # -----------------------------------------------------------------------------
2828 # === INTROSPECTION ===
2830 # The following functions and classes are used to implement the PLY
2831 # introspection features followed by the yacc() function itself.
2832 # -----------------------------------------------------------------------------
2834 # -----------------------------------------------------------------------------
2835 # get_caller_module_dict()
2837 # This function returns a dictionary containing all of the symbols defined within
2838 # a caller further down the call stack. This is used to get the environment
2839 # associated with the yacc() call if none was provided.
2840 # -----------------------------------------------------------------------------
2842 def get_caller_module_dict(levels):
2843 f = sys._getframe(levels)
2844 ldict = f.f_globals.copy()
2845 if f.f_globals != f.f_locals:
2846 ldict.update(f.f_locals)
2847 return ldict
2849 # -----------------------------------------------------------------------------
2850 # parse_grammar()
2852 # This takes a raw grammar rule string and parses it into production data
2853 # -----------------------------------------------------------------------------
2854 def parse_grammar(doc, file, line):
2855 grammar = []
2856 # Split the doc string into lines
2857 pstrings = doc.splitlines()
2858 lastp = None
2859 dline = line
2860 for ps in pstrings:
2861 dline += 1
2862 p = ps.split()
2863 if not p:
2864 continue
2865 try:
2866 if p[0] == '|':
2867 # This is a continuation of a previous rule
2868 if not lastp:
2869 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2870 prodname = lastp
2871 syms = p[1:]
2872 else:
2873 prodname = p[0]
2874 lastp = prodname
2875 syms = p[2:]
2876 assign = p[1]
2877 if assign != ':' and assign != '::=':
2878 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2880 grammar.append((file, dline, prodname, syms))
2881 except SyntaxError:
2882 raise
2883 except Exception:
2884 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2886 return grammar
2888 # -----------------------------------------------------------------------------
2889 # ParserReflect()
2891 # This class represents information extracted for building a parser including
2892 # start symbol, error function, tokens, precedence list, action functions,
2893 # etc.
2894 # -----------------------------------------------------------------------------
2895 class ParserReflect(object):
2896 def __init__(self, pdict, log=None):
2897 self.pdict = pdict
2898 self.start = None
2899 self.error_func = None
2900 self.tokens = None
2901 self.modules = set()
2902 self.grammar = []
2903 self.error = False
2905 if log is None:
2906 self.log = PlyLogger(sys.stderr)
2907 else:
2908 self.log = log
2910 # Get all of the basic information
2911 def get_all(self):
2912 self.get_start()
2913 self.get_error_func()
2914 self.get_tokens()
2915 self.get_precedence()
2916 self.get_pfunctions()
2918 # Validate all of the information
2919 def validate_all(self):
2920 self.validate_start()
2921 self.validate_error_func()
2922 self.validate_tokens()
2923 self.validate_precedence()
2924 self.validate_pfunctions()
2925 self.validate_modules()
2926 return self.error
2928 # Compute a signature over the grammar
2929 def signature(self):
2930 parts = []
2931 try:
2932 if self.start:
2933 parts.append(self.start)
2934 if self.prec:
2935 parts.append(''.join([''.join(p) for p in self.prec]))
2936 if self.tokens:
2937 parts.append(' '.join(self.tokens))
2938 for f in self.pfuncs:
2939 if f[3]:
2940 parts.append(f[3])
2941 except (TypeError, ValueError):
2942 pass
2943 return ''.join(parts)
2945 # -----------------------------------------------------------------------------
2946 # validate_modules()
2948 # This method checks to see if there are duplicated p_rulename() functions
2949 # in the parser module file. Without this function, it is really easy for
2950 # users to make mistakes by cutting and pasting code fragments (and it's a real
2951 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2952 # we just do a little regular expression pattern matching of def statements
2953 # to try and detect duplicates.
2954 # -----------------------------------------------------------------------------
2956 def validate_modules(self):
2957 # Match def p_funcname(
2958 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2960 for module in self.modules:
2961 try:
2962 lines, linen = inspect.getsourcelines(module)
2963 except IOError:
2964 continue
2966 counthash = {}
2967 for linen, line in enumerate(lines):
2968 linen += 1
2969 m = fre.match(line)
2970 if m:
2971 name = m.group(1)
2972 prev = counthash.get(name)
2973 if not prev:
2974 counthash[name] = linen
2975 else:
2976 filename = inspect.getsourcefile(module)
2977 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2978 filename, linen, name, prev)
2980 # Get the start symbol
2981 def get_start(self):
2982 self.start = self.pdict.get('start')
2984 # Validate the start symbol
2985 def validate_start(self):
2986 if self.start is not None:
2987 if not isinstance(self.start, string_types):
2988 self.log.error("'start' must be a string")
2990 # Look for error handler
2991 def get_error_func(self):
2992 self.error_func = self.pdict.get('p_error')
2994 # Validate the error function
2995 def validate_error_func(self):
2996 if self.error_func:
2997 if isinstance(self.error_func, types.FunctionType):
2998 ismethod = 0
2999 elif isinstance(self.error_func, types.MethodType):
3000 ismethod = 1
3001 else:
3002 self.log.error("'p_error' defined, but is not a function or method")
3003 self.error = True
3004 return
3006 eline = self.error_func.__code__.co_firstlineno
3007 efile = self.error_func.__code__.co_filename
3008 module = inspect.getmodule(self.error_func)
3009 self.modules.add(module)
3011 argcount = self.error_func.__code__.co_argcount - ismethod
3012 if argcount != 1:
3013 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3014 self.error = True
3016 # Get the tokens map
3017 def get_tokens(self):
3018 tokens = self.pdict.get('tokens')
3019 if not tokens:
3020 self.log.error('No token list is defined')
3021 self.error = True
3022 return
3024 if not isinstance(tokens, (list, tuple)):
3025 self.log.error('tokens must be a list or tuple')
3026 self.error = True
3027 return
3029 if not tokens:
3030 self.log.error('tokens is empty')
3031 self.error = True
3032 return
3034 self.tokens = sorted(tokens)
3036 # Validate the tokens
3037 def validate_tokens(self):
3038 # Validate the tokens.
3039 if 'error' in self.tokens:
3040 self.log.error("Illegal token name 'error'. Is a reserved word")
3041 self.error = True
3042 return
3044 terminals = set()
3045 for n in self.tokens:
3046 if n in terminals:
3047 self.log.warning('Token %r multiply defined', n)
3048 terminals.add(n)
3050 # Get the precedence map (if any)
3051 def get_precedence(self):
3052 self.prec = self.pdict.get('precedence')
3054 # Validate and parse the precedence map
3055 def validate_precedence(self):
3056 preclist = []
3057 if self.prec:
3058 if not isinstance(self.prec, (list, tuple)):
3059 self.log.error('precedence must be a list or tuple')
3060 self.error = True
3061 return
3062 for level, p in enumerate(self.prec):
3063 if not isinstance(p, (list, tuple)):
3064 self.log.error('Bad precedence table')
3065 self.error = True
3066 return
3068 if len(p) < 2:
3069 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3070 self.error = True
3071 return
3072 assoc = p[0]
3073 if not isinstance(assoc, string_types):
3074 self.log.error('precedence associativity must be a string')
3075 self.error = True
3076 return
3077 for term in p[1:]:
3078 if not isinstance(term, string_types):
3079 self.log.error('precedence items must be strings')
3080 self.error = True
3081 return
3082 preclist.append((term, assoc, level+1))
3083 self.preclist = preclist
3085 # Get all p_functions from the grammar
3086 def get_pfunctions(self):
3087 p_functions = []
3088 for name, item in self.pdict.items():
3089 if not name.startswith('p_') or name == 'p_error':
3090 continue
3091 if isinstance(item, (types.FunctionType, types.MethodType)):
3092 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
3093 module = inspect.getmodule(item)
3094 p_functions.append((line, module, name, item.__doc__))
3096 # Sort all of the actions by line number; make sure to stringify
3097 # modules to make them sortable, since `line` may not uniquely sort all
3098 # p functions
3099 p_functions.sort(key=lambda p_function: (
3100 p_function[0],
3101 str(p_function[1]),
3102 p_function[2],
3103 p_function[3]))
3104 self.pfuncs = p_functions
3106 # Validate all of the p_functions
3107 def validate_pfunctions(self):
3108 grammar = []
3109 # Check for non-empty symbols
3110 if len(self.pfuncs) == 0:
3111 self.log.error('no rules of the form p_rulename are defined')
3112 self.error = True
3113 return
3115 for line, module, name, doc in self.pfuncs:
3116 file = inspect.getsourcefile(module)
3117 func = self.pdict[name]
3118 if isinstance(func, types.MethodType):
3119 reqargs = 2
3120 else:
3121 reqargs = 1
3122 if func.__code__.co_argcount > reqargs:
3123 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3124 self.error = True
3125 elif func.__code__.co_argcount < reqargs:
3126 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3127 self.error = True
3128 elif not func.__doc__:
3129 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3130 file, line, func.__name__)
3131 else:
3132 try:
3133 parsed_g = parse_grammar(doc, file, line)
3134 for g in parsed_g:
3135 grammar.append((name, g))
3136 except SyntaxError as e:
3137 self.log.error(str(e))
3138 self.error = True
3140 # Looks like a valid grammar rule
3141 # Mark the file in which defined.
3142 self.modules.add(module)
3144 # Secondary validation step that looks for p_ definitions that are not functions
3145 # or functions that look like they might be grammar rules.
3147 for n, v in self.pdict.items():
3148 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3149 continue
3150 if n.startswith('t_'):
3151 continue
3152 if n.startswith('p_') and n != 'p_error':
3153 self.log.warning('%r not defined as a function', n)
3154 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3155 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3156 if v.__doc__:
3157 try:
3158 doc = v.__doc__.split(' ')
3159 if doc[1] == ':':
3160 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3161 v.__code__.co_filename, v.__code__.co_firstlineno, n)
3162 except IndexError:
3163 pass
3165 self.grammar = grammar
3167 # -----------------------------------------------------------------------------
3168 # yacc(module)
3170 # Build a parser
3171 # -----------------------------------------------------------------------------
3173 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3174 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3175 outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3177 if tabmodule is None:
3178 tabmodule = tab_module
3180 # Reference to the parsing method of the last built parser
3181 global parse
3183 # If pickling is enabled, table files are not created
3184 if picklefile:
3185 write_tables = 0
3187 if errorlog is None:
3188 errorlog = PlyLogger(sys.stderr)
3190 # Get the module dictionary used for the parser
3191 if module:
3192 _items = [(k, getattr(module, k)) for k in dir(module)]
3193 pdict = dict(_items)
3194 # If no __file__ or __package__ attributes are available, try to obtain them
3195 # from the __module__ instead
3196 if '__file__' not in pdict:
3197 pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3198 if '__package__' not in pdict and '__module__' in pdict:
3199 if hasattr(sys.modules[pdict['__module__']], '__package__'):
3200 pdict['__package__'] = sys.modules[pdict['__module__']].__package__
3201 else:
3202 pdict = get_caller_module_dict(2)
3204 if outputdir is None:
3205 # If no output directory is set, the location of the output files
3206 # is determined according to the following rules:
3207 # - If tabmodule specifies a package, files go into that package directory
3208 # - Otherwise, files go in the same directory as the specifying module
3209 if isinstance(tabmodule, types.ModuleType):
3210 srcfile = tabmodule.__file__
3211 else:
3212 if '.' not in tabmodule:
3213 srcfile = pdict['__file__']
3214 else:
3215 parts = tabmodule.split('.')
3216 pkgname = '.'.join(parts[:-1])
3217 exec('import %s' % pkgname)
3218 srcfile = getattr(sys.modules[pkgname], '__file__', '')
3219 outputdir = os.path.dirname(srcfile)
3221 # Determine if the module is package of a package or not.
3222 # If so, fix the tabmodule setting so that tables load correctly
3223 pkg = pdict.get('__package__')
3224 if pkg and isinstance(tabmodule, str):
3225 if '.' not in tabmodule:
3226 tabmodule = pkg + '.' + tabmodule
3230 # Set start symbol if it's specified directly using an argument
3231 if start is not None:
3232 pdict['start'] = start
3234 # Collect parser information from the dictionary
3235 pinfo = ParserReflect(pdict, log=errorlog)
3236 pinfo.get_all()
3238 if pinfo.error:
3239 raise YaccError('Unable to build parser')
3241 # Check signature against table files (if any)
3242 signature = pinfo.signature()
3244 # Read the tables
3245 try:
3246 lr = LRTable()
3247 if picklefile:
3248 read_signature = lr.read_pickle(picklefile)
3249 else:
3250 read_signature = lr.read_table(tabmodule)
3251 if optimize or (read_signature == signature):
3252 try:
3253 lr.bind_callables(pinfo.pdict)
3254 parser = LRParser(lr, pinfo.error_func)
3255 parse = parser.parse
3256 return parser
3257 except Exception as e:
3258 errorlog.warning('There was a problem loading the table file: %r', e)
3259 except VersionError as e:
3260 errorlog.warning(str(e))
3261 except ImportError:
3262 pass
3264 if debuglog is None:
3265 if debug:
3266 try:
3267 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3268 except IOError as e:
3269 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3270 debuglog = NullLogger()
3271 else:
3272 debuglog = NullLogger()
3274 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3276 errors = False
3278 # Validate the parser information
3279 if pinfo.validate_all():
3280 raise YaccError('Unable to build parser')
3282 if not pinfo.error_func:
3283 errorlog.warning('no p_error() function is defined')
3285 # Create a grammar object
3286 grammar = Grammar(pinfo.tokens)
3288 # Set precedence level for terminals
3289 for term, assoc, level in pinfo.preclist:
3290 try:
3291 grammar.set_precedence(term, assoc, level)
3292 except GrammarError as e:
3293 errorlog.warning('%s', e)
3295 # Add productions to the grammar
3296 for funcname, gram in pinfo.grammar:
3297 file, line, prodname, syms = gram
3298 try:
3299 grammar.add_production(prodname, syms, funcname, file, line)
3300 except GrammarError as e:
3301 errorlog.error('%s', e)
3302 errors = True
3304 # Set the grammar start symbols
3305 try:
3306 if start is None:
3307 grammar.set_start(pinfo.start)
3308 else:
3309 grammar.set_start(start)
3310 except GrammarError as e:
3311 errorlog.error(str(e))
3312 errors = True
3314 if errors:
3315 raise YaccError('Unable to build parser')
3317 # Verify the grammar structure
3318 undefined_symbols = grammar.undefined_symbols()
3319 for sym, prod in undefined_symbols:
3320 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3321 errors = True
3323 unused_terminals = grammar.unused_terminals()
3324 if unused_terminals:
3325 debuglog.info('')
3326 debuglog.info('Unused terminals:')
3327 debuglog.info('')
3328 for term in unused_terminals:
3329 errorlog.warning('Token %r defined, but not used', term)
3330 debuglog.info(' %s', term)
3332 # Print out all productions to the debug log
3333 if debug:
3334 debuglog.info('')
3335 debuglog.info('Grammar')
3336 debuglog.info('')
3337 for n, p in enumerate(grammar.Productions):
3338 debuglog.info('Rule %-5d %s', n, p)
3340 # Find unused non-terminals
3341 unused_rules = grammar.unused_rules()
3342 for prod in unused_rules:
3343 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3345 if len(unused_terminals) == 1:
3346 errorlog.warning('There is 1 unused token')
3347 if len(unused_terminals) > 1:
3348 errorlog.warning('There are %d unused tokens', len(unused_terminals))
3350 if len(unused_rules) == 1:
3351 errorlog.warning('There is 1 unused rule')
3352 if len(unused_rules) > 1:
3353 errorlog.warning('There are %d unused rules', len(unused_rules))
3355 if debug:
3356 debuglog.info('')
3357 debuglog.info('Terminals, with rules where they appear')
3358 debuglog.info('')
3359 terms = list(grammar.Terminals)
3360 terms.sort()
3361 for term in terms:
3362 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3364 debuglog.info('')
3365 debuglog.info('Nonterminals, with rules where they appear')
3366 debuglog.info('')
3367 nonterms = list(grammar.Nonterminals)
3368 nonterms.sort()
3369 for nonterm in nonterms:
3370 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3371 debuglog.info('')
3373 if check_recursion:
3374 unreachable = grammar.find_unreachable()
3375 for u in unreachable:
3376 errorlog.warning('Symbol %r is unreachable', u)
3378 infinite = grammar.infinite_cycles()
3379 for inf in infinite:
3380 errorlog.error('Infinite recursion detected for symbol %r', inf)
3381 errors = True
3383 unused_prec = grammar.unused_precedence()
3384 for term, assoc in unused_prec:
3385 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3386 errors = True
3388 if errors:
3389 raise YaccError('Unable to build parser')
3391 # Run the LRGeneratedTable on the grammar
3392 if debug:
3393 errorlog.debug('Generating %s tables', method)
3395 lr = LRGeneratedTable(grammar, method, debuglog)
3397 if debug:
3398 num_sr = len(lr.sr_conflicts)
3400 # Report shift/reduce and reduce/reduce conflicts
3401 if num_sr == 1:
3402 errorlog.warning('1 shift/reduce conflict')
3403 elif num_sr > 1:
3404 errorlog.warning('%d shift/reduce conflicts', num_sr)
3406 num_rr = len(lr.rr_conflicts)
3407 if num_rr == 1:
3408 errorlog.warning('1 reduce/reduce conflict')
3409 elif num_rr > 1:
3410 errorlog.warning('%d reduce/reduce conflicts', num_rr)
3412 # Write out conflicts to the output file
3413 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3414 debuglog.warning('')
3415 debuglog.warning('Conflicts:')
3416 debuglog.warning('')
3418 for state, tok, resolution in lr.sr_conflicts:
3419 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
3421 already_reported = set()
3422 for state, rule, rejected in lr.rr_conflicts:
3423 if (state, id(rule), id(rejected)) in already_reported:
3424 continue
3425 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3426 debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3427 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3428 errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3429 already_reported.add((state, id(rule), id(rejected)))
3431 warned_never = []
3432 for state, rule, rejected in lr.rr_conflicts:
3433 if not rejected.reduced and (rejected not in warned_never):
3434 debuglog.warning('Rule (%s) is never reduced', rejected)
3435 errorlog.warning('Rule (%s) is never reduced', rejected)
3436 warned_never.append(rejected)
3438 # Write the table file if requested
3439 if write_tables:
3440 try:
3441 lr.write_table(tabmodule, outputdir, signature)
3442 if tabmodule in sys.modules:
3443 del sys.modules[tabmodule]
3444 except IOError as e:
3445 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3447 # Write a pickled version of the tables
3448 if picklefile:
3449 try:
3450 lr.pickle_table(picklefile, signature)
3451 except IOError as e:
3452 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3454 # Build the parser
3455 lr.bind_callables(pinfo.pdict)
3456 parser = LRParser(lr, pinfo.error_func)
3458 parse = parser.parse
3459 return parser