1 # -----------------------------------------------------------------------------
4 # Copyright (C) 2001-2018
5 # David M. Beazley (Dabeaz LLC)
8 # SPDX-License-Identifier: BSD-3-Clause
10 # :::::::: WARNING :::::::
12 # Construction of LR parsing tables is fairly complicated and expensive.
13 # To make this module run fast, a *LOT* of work has been put into
14 # optimization---often at the expensive of readability and what might
15 # consider to be good Python "coding style." Modify the code at your
17 # ----------------------------------------------------------------------------
27 __tabversion__
= '3.10'
29 #-----------------------------------------------------------------------------
30 # === User configurable parameters ===
32 # Change these to modify the default behavior of yacc (if you wish)
33 #-----------------------------------------------------------------------------
35 yaccdebug
= True # Debugging mode. If set, yacc generates a
36 # a 'parser.out' file in the current directory
38 debug_file
= 'parser.out' # Default name of the debugging file
39 tab_module
= 'parsetab' # Default name of the table module
40 default_lr
= 'LALR' # Default LR table generation method
42 error_count
= 3 # Number of symbols that must be shifted to leave recovery mode
44 yaccdevel
= False # Set to True if developing yacc. This turns off optimized
45 # implementations of certain functions.
47 resultlimit
= 40 # Size limit of results when running in debug mode.
49 pickle_protocol
= 0 # Protocol to use when writing pickle files
51 # String type-checking compatibility
52 if sys
.version_info
[0] < 3:
53 string_types
= basestring
59 # This object is a stand-in for a logging object created by the
60 # logging module. PLY will use this by default to create things
61 # such as the parser.out file. If a user wants more detailed
62 # information, they can create their own logging object and pass
65 class PlyLogger(object):
66 def __init__(self
, f
):
69 def debug(self
, msg
, *args
, **kwargs
):
70 self
.f
.write((msg
% args
) + '\n')
74 def warning(self
, msg
, *args
, **kwargs
):
75 self
.f
.write('WARNING: ' + (msg
% args
) + '\n')
77 def error(self
, msg
, *args
, **kwargs
):
78 self
.f
.write('ERROR: ' + (msg
% args
) + '\n')
82 # Null logger is used when no output is generated. Does nothing.
83 class NullLogger(object):
84 def __getattribute__(self
, name
):
87 def __call__(self
, *args
, **kwargs
):
90 # Exception raised for yacc-related errors
91 class YaccError(Exception):
94 # Format the result message that the parser produces when running in debug mode.
98 repr_str
= repr(repr_str
)
99 if len(repr_str
) > resultlimit
:
100 repr_str
= repr_str
[:resultlimit
] + ' ...'
101 result
= '<%s @ 0x%x> (%s)' % (type(r
).__name
__, id(r
), repr_str
)
104 # Format stack entries when the parser is running in debug mode
105 def format_stack_entry(r
):
108 repr_str
= repr(repr_str
)
109 if len(repr_str
) < 16:
112 return '<%s @ 0x%x>' % (type(r
).__name
__, id(r
))
114 # Panic mode error recovery support. This feature is being reworked--much of the
115 # code here is to offer a deprecation/backwards compatible transition
120 _warnmsg
= '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
121 Instead, invoke the methods on the associated parser instance:
125 # Use parser.errok(), parser.token(), parser.restart()
132 warnings
.warn(_warnmsg
)
136 warnings
.warn(_warnmsg
)
140 warnings
.warn(_warnmsg
)
143 # Utility function to call the p_error() function with some deprecation hacks
144 def call_errorfunc(errorfunc
, token
, parser
):
145 global _errok
, _token
, _restart
146 _errok
= parser
.errok
147 _token
= parser
.token
148 _restart
= parser
.restart
151 del _errok
, _token
, _restart
156 #-----------------------------------------------------------------------------
157 # === LR Parsing Engine ===
159 # The following classes are used for the LR parser itself. These are not
160 # used during table construction and are independent of the actual LR
161 # table generation algorithm
162 #-----------------------------------------------------------------------------
164 # This class is used to hold non-terminal grammar symbols during parsing.
165 # It normally has the following attributes set:
166 # .type = Grammar symbol type
167 # .value = Symbol value
168 # .lineno = Starting line number
169 # .endlineno = Ending line number (optional, set automatically)
170 # .lexpos = Starting lex position
171 # .endlexpos = Ending lex position (optional, set automatically)
180 # This class is a wrapper around the objects actually passed to each
181 # grammar rule. Index lookup and assignment actually assign the
182 # .value attribute of the underlying YaccSymbol object.
183 # The lineno() method returns the line number of a given
184 # item (or 0 if not defined). The linespan() method returns
185 # a tuple of (startline,endline) representing the range of lines
186 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
187 # representing the range of positional information for a symbol.
189 class YaccProduction
:
190 def __init__(self
, s
, stack
=None):
196 def __getitem__(self
, n
):
197 if isinstance(n
, slice):
198 return [s
.value
for s
in self
.slice[n
]]
200 return self
.slice[n
].value
202 return self
.stack
[n
].value
204 def __setitem__(self
, n
, v
):
205 self
.slice[n
].value
= v
207 def __getslice__(self
, i
, j
):
208 return [s
.value
for s
in self
.slice[i
:j
]]
211 return len(self
.slice)
214 return getattr(self
.slice[n
], 'lineno', 0)
216 def set_lineno(self
, n
, lineno
):
217 self
.slice[n
].lineno
= lineno
219 def linespan(self
, n
):
220 startline
= getattr(self
.slice[n
], 'lineno', 0)
221 endline
= getattr(self
.slice[n
], 'endlineno', startline
)
222 return startline
, endline
225 return getattr(self
.slice[n
], 'lexpos', 0)
227 def set_lexpos(self
, n
, lexpos
):
228 self
.slice[n
].lexpos
= lexpos
230 def lexspan(self
, n
):
231 startpos
= getattr(self
.slice[n
], 'lexpos', 0)
232 endpos
= getattr(self
.slice[n
], 'endlexpos', startpos
)
233 return startpos
, endpos
238 # -----------------------------------------------------------------------------
241 # The LR Parsing engine.
242 # -----------------------------------------------------------------------------
245 def __init__(self
, lrtab
, errorf
):
246 self
.productions
= lrtab
.lr_productions
247 self
.action
= lrtab
.lr_action
248 self
.goto
= lrtab
.lr_goto
249 self
.errorfunc
= errorf
250 self
.set_defaulted_states()
257 del self
.statestack
[:]
261 self
.symstack
.append(sym
)
262 self
.statestack
.append(0)
264 # Defaulted state support.
265 # This method identifies parser states where there is only one possible reduction action.
266 # For such states, the parser can make a choose to make a rule reduction without consuming
267 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
268 # certain kinds of advanced parsing situations where the lexer and parser interact with
269 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
271 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
272 def set_defaulted_states(self
):
273 self
.defaulted_states
= {}
274 for state
, actions
in self
.action
.items():
275 rules
= list(actions
.values())
276 if len(rules
) == 1 and rules
[0] < 0:
277 self
.defaulted_states
[state
] = rules
[0]
279 def disable_defaulted_states(self
):
280 self
.defaulted_states
= {}
282 def parse(self
, input=None, lexer
=None, debug
=False, tracking
=False, tokenfunc
=None):
283 if debug
or yaccdevel
:
284 if isinstance(debug
, int):
285 debug
= PlyLogger(sys
.stderr
)
286 return self
.parsedebug(input, lexer
, debug
, tracking
, tokenfunc
)
288 return self
.parseopt(input, lexer
, debug
, tracking
, tokenfunc
)
290 return self
.parseopt_notrack(input, lexer
, debug
, tracking
, tokenfunc
)
293 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
296 # This is the debugging enabled version of parse(). All changes made to the
297 # parsing engine should be made here. Optimized versions of this function
298 # are automatically created by the ply/ygen.py script. This script cuts out
299 # sections enclosed in markers such as this:
305 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
307 def parsedebug(self
, input=None, lexer
=None, debug
=False, tracking
=False, tokenfunc
=None):
308 #--! parsedebug-start
309 lookahead
= None # Current lookahead symbol
310 lookaheadstack
= [] # Stack of lookahead symbols
311 actions
= self
.action
# Local reference to action table (to avoid lookup on self.)
312 goto
= self
.goto
# Local reference to goto table (to avoid lookup on self.)
313 prod
= self
.productions
# Local reference to production list (to avoid lookup on self.)
314 defaulted_states
= self
.defaulted_states
# Local reference to defaulted states
315 pslice
= YaccProduction(None) # Production object passed to grammar rules
316 errorcount
= 0 # Used during error recovery
319 debug
.info('PLY: PARSE DEBUG START')
322 # If no lexer was given, we will try to use the lex module
327 # Set up the lexer and parser objects on pslice
331 # If input was supplied, pass to lexer
332 if input is not None:
335 if tokenfunc
is None:
337 get_token
= lexer
.token
339 get_token
= tokenfunc
341 # Set the parser() token method (sometimes used in error recovery)
342 self
.token
= get_token
344 # Set up the state and symbol stacks
346 statestack
= [] # Stack of parsing states
347 self
.statestack
= statestack
348 symstack
= [] # Stack of grammar symbols
349 self
.symstack
= symstack
351 pslice
.stack
= symstack
# Put in the production
352 errtoken
= None # Err token
354 # The start state is assumed to be (0,$end)
362 # Get the next symbol on the input. If a lookahead symbol
363 # is already set, we just use that. Otherwise, we'll pull
364 # the next token off of the lookaheadstack or from the lexer
368 debug
.debug('State : %s', state
)
371 if state
not in defaulted_states
:
373 if not lookaheadstack
:
374 lookahead
= get_token() # Get the next token
376 lookahead
= lookaheadstack
.pop()
378 lookahead
= YaccSymbol()
379 lookahead
.type = '$end'
381 # Check the action table
382 ltype
= lookahead
.type
383 t
= actions
[state
].get(ltype
)
385 t
= defaulted_states
[state
]
387 debug
.debug('Defaulted state %s: Reduce using %d', state
, -t
)
391 debug
.debug('Stack : %s',
392 ('%s . %s' % (' '.join([xx
.type for xx
in symstack
][1:]), str(lookahead
))).lstrip())
397 # shift a symbol on the stack
402 debug
.debug('Action : Shift and goto state %s', t
)
405 symstack
.append(lookahead
)
408 # Decrease error count on successful shift
414 # reduce a symbol on the stack, emit a production
419 # Get production function
421 sym
.type = pname
# Production name
426 debug
.info('Action : Reduce rule [%s] with %s and goto state %d', p
.str,
427 '['+','.join([format_stack_entry(_v
.value
) for _v
in symstack
[-plen
:]])+']',
428 goto
[statestack
[-1-plen
]][pname
])
430 debug
.info('Action : Reduce rule [%s] with %s and goto state %d', p
.str, [],
431 goto
[statestack
[-1]][pname
])
436 targ
= symstack
[-plen
-1:]
442 sym
.lineno
= t1
.lineno
443 sym
.lexpos
= t1
.lexpos
445 sym
.endlineno
= getattr(t1
, 'endlineno', t1
.lineno
)
446 sym
.endlexpos
= getattr(t1
, 'endlexpos', t1
.lexpos
)
449 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
450 # The code enclosed in this section is duplicated
451 # below as a performance optimization. Make sure
452 # changes get made in both locations.
457 # Call the grammar rule with our special slice object
461 del statestack
[-plen
:]
463 debug
.info('Result : %s', format_result(pslice
[0]))
466 state
= goto
[statestack
[-1]][pname
]
467 statestack
.append(state
)
469 # If an error was set. Enter error recovery state
470 lookaheadstack
.append(lookahead
) # Save the current lookahead token
471 symstack
.extend(targ
[1:-1]) # Put the production slice back on the stack
472 statestack
.pop() # Pop back one state (before the reduce)
473 state
= statestack
[-1]
477 errorcount
= error_count
481 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
487 sym
.lineno
= lexer
.lineno
488 sym
.lexpos
= lexer
.lexpos
493 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
494 # The code enclosed in this section is duplicated
495 # above as a performance optimization. Make sure
496 # changes get made in both locations.
501 # Call the grammar rule with our special slice object
505 debug
.info('Result : %s', format_result(pslice
[0]))
508 state
= goto
[statestack
[-1]][pname
]
509 statestack
.append(state
)
511 # If an error was set. Enter error recovery state
512 lookaheadstack
.append(lookahead
) # Save the current lookahead token
513 statestack
.pop() # Pop back one state (before the reduce)
514 state
= statestack
[-1]
518 errorcount
= error_count
522 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
526 result
= getattr(n
, 'value', None)
528 debug
.info('Done : Returning %s', format_result(result
))
529 debug
.info('PLY: PARSE DEBUG END')
536 debug
.error('Error : %s',
537 ('%s . %s' % (' '.join([xx
.type for xx
in symstack
][1:]), str(lookahead
))).lstrip())
540 # We have some kind of parsing error here. To handle
541 # this, we are going to push the current token onto
542 # the tokenstack and replace it with an 'error' token.
543 # If there are any synchronization rules, they may
546 # In addition to pushing the error token, we call call
547 # the user defined p_error() function if this is the
548 # first syntax error. This function is only called if
550 if errorcount
== 0 or self
.errorok
:
551 errorcount
= error_count
554 if errtoken
.type == '$end':
555 errtoken
= None # End of file!
557 if errtoken
and not hasattr(errtoken
, 'lexer'):
558 errtoken
.lexer
= lexer
560 tok
= call_errorfunc(self
.errorfunc
, errtoken
, self
)
562 # User must have done some kind of panic
563 # mode recovery on their own. The
564 # returned token is the next lookahead
570 if hasattr(errtoken
, 'lineno'):
571 lineno
= lookahead
.lineno
575 sys
.stderr
.write('yacc: Syntax error at line %d, token=%s\n' % (lineno
, errtoken
.type))
577 sys
.stderr
.write('yacc: Syntax error, token=%s' % errtoken
.type)
579 sys
.stderr
.write('yacc: Parse error in input. EOF\n')
583 errorcount
= error_count
585 # case 1: the statestack only has 1 entry on it. If we're in this state, the
586 # entire parse has been rolled back and we're completely hosed. The token is
587 # discarded and we just keep going.
589 if len(statestack
) <= 1 and lookahead
.type != '$end':
593 # Nuke the pushback stack
594 del lookaheadstack
[:]
597 # case 2: the statestack has a couple of entries on it, but we're
598 # at the end of the file. nuke the top entry and generate an error token
600 # Start nuking entries on the stack
601 if lookahead
.type == '$end':
602 # Whoa. We're really hosed here. Bail out
605 if lookahead
.type != 'error':
607 if sym
.type == 'error':
608 # Hmmm. Error is on top of stack, we'll just nuke input
609 # symbol and continue
612 sym
.endlineno
= getattr(lookahead
, 'lineno', sym
.lineno
)
613 sym
.endlexpos
= getattr(lookahead
, 'lexpos', sym
.lexpos
)
618 # Create the error symbol for the first time and make it the new lookahead symbol
622 if hasattr(lookahead
, 'lineno'):
623 t
.lineno
= t
.endlineno
= lookahead
.lineno
624 if hasattr(lookahead
, 'lexpos'):
625 t
.lexpos
= t
.endlexpos
= lookahead
.lexpos
627 lookaheadstack
.append(lookahead
)
633 lookahead
.lineno
= sym
.lineno
634 lookahead
.lexpos
= sym
.lexpos
637 state
= statestack
[-1]
641 # Call an error function here
642 raise RuntimeError('yacc: internal parser error!!!\n')
646 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
649 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
650 # This code is automatically generated by the ply/ygen.py script. Make
651 # changes to the parsedebug() method instead.
652 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
654 def parseopt(self
, input=None, lexer
=None, debug
=False, tracking
=False, tokenfunc
=None):
656 lookahead
= None # Current lookahead symbol
657 lookaheadstack
= [] # Stack of lookahead symbols
658 actions
= self
.action
# Local reference to action table (to avoid lookup on self.)
659 goto
= self
.goto
# Local reference to goto table (to avoid lookup on self.)
660 prod
= self
.productions
# Local reference to production list (to avoid lookup on self.)
661 defaulted_states
= self
.defaulted_states
# Local reference to defaulted states
662 pslice
= YaccProduction(None) # Production object passed to grammar rules
663 errorcount
= 0 # Used during error recovery
666 # If no lexer was given, we will try to use the lex module
671 # Set up the lexer and parser objects on pslice
675 # If input was supplied, pass to lexer
676 if input is not None:
679 if tokenfunc
is None:
681 get_token
= lexer
.token
683 get_token
= tokenfunc
685 # Set the parser() token method (sometimes used in error recovery)
686 self
.token
= get_token
688 # Set up the state and symbol stacks
690 statestack
= [] # Stack of parsing states
691 self
.statestack
= statestack
692 symstack
= [] # Stack of grammar symbols
693 self
.symstack
= symstack
695 pslice
.stack
= symstack
# Put in the production
696 errtoken
= None # Err token
698 # The start state is assumed to be (0,$end)
706 # Get the next symbol on the input. If a lookahead symbol
707 # is already set, we just use that. Otherwise, we'll pull
708 # the next token off of the lookaheadstack or from the lexer
711 if state
not in defaulted_states
:
713 if not lookaheadstack
:
714 lookahead
= get_token() # Get the next token
716 lookahead
= lookaheadstack
.pop()
718 lookahead
= YaccSymbol()
719 lookahead
.type = '$end'
721 # Check the action table
722 ltype
= lookahead
.type
723 t
= actions
[state
].get(ltype
)
725 t
= defaulted_states
[state
]
730 # shift a symbol on the stack
735 symstack
.append(lookahead
)
738 # Decrease error count on successful shift
744 # reduce a symbol on the stack, emit a production
749 # Get production function
751 sym
.type = pname
# Production name
756 targ
= symstack
[-plen
-1:]
762 sym
.lineno
= t1
.lineno
763 sym
.lexpos
= t1
.lexpos
765 sym
.endlineno
= getattr(t1
, 'endlineno', t1
.lineno
)
766 sym
.endlexpos
= getattr(t1
, 'endlexpos', t1
.lexpos
)
769 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
770 # The code enclosed in this section is duplicated
771 # below as a performance optimization. Make sure
772 # changes get made in both locations.
777 # Call the grammar rule with our special slice object
781 del statestack
[-plen
:]
783 state
= goto
[statestack
[-1]][pname
]
784 statestack
.append(state
)
786 # If an error was set. Enter error recovery state
787 lookaheadstack
.append(lookahead
) # Save the current lookahead token
788 symstack
.extend(targ
[1:-1]) # Put the production slice back on the stack
789 statestack
.pop() # Pop back one state (before the reduce)
790 state
= statestack
[-1]
794 errorcount
= error_count
798 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
804 sym
.lineno
= lexer
.lineno
805 sym
.lexpos
= lexer
.lexpos
810 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
811 # The code enclosed in this section is duplicated
812 # above as a performance optimization. Make sure
813 # changes get made in both locations.
818 # Call the grammar rule with our special slice object
822 state
= goto
[statestack
[-1]][pname
]
823 statestack
.append(state
)
825 # If an error was set. Enter error recovery state
826 lookaheadstack
.append(lookahead
) # Save the current lookahead token
827 statestack
.pop() # Pop back one state (before the reduce)
828 state
= statestack
[-1]
832 errorcount
= error_count
836 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
840 result
= getattr(n
, 'value', None)
846 # We have some kind of parsing error here. To handle
847 # this, we are going to push the current token onto
848 # the tokenstack and replace it with an 'error' token.
849 # If there are any synchronization rules, they may
852 # In addition to pushing the error token, we call call
853 # the user defined p_error() function if this is the
854 # first syntax error. This function is only called if
856 if errorcount
== 0 or self
.errorok
:
857 errorcount
= error_count
860 if errtoken
.type == '$end':
861 errtoken
= None # End of file!
863 if errtoken
and not hasattr(errtoken
, 'lexer'):
864 errtoken
.lexer
= lexer
866 tok
= call_errorfunc(self
.errorfunc
, errtoken
, self
)
868 # User must have done some kind of panic
869 # mode recovery on their own. The
870 # returned token is the next lookahead
876 if hasattr(errtoken
, 'lineno'):
877 lineno
= lookahead
.lineno
881 sys
.stderr
.write('yacc: Syntax error at line %d, token=%s\n' % (lineno
, errtoken
.type))
883 sys
.stderr
.write('yacc: Syntax error, token=%s' % errtoken
.type)
885 sys
.stderr
.write('yacc: Parse error in input. EOF\n')
889 errorcount
= error_count
891 # case 1: the statestack only has 1 entry on it. If we're in this state, the
892 # entire parse has been rolled back and we're completely hosed. The token is
893 # discarded and we just keep going.
895 if len(statestack
) <= 1 and lookahead
.type != '$end':
899 # Nuke the pushback stack
900 del lookaheadstack
[:]
903 # case 2: the statestack has a couple of entries on it, but we're
904 # at the end of the file. nuke the top entry and generate an error token
906 # Start nuking entries on the stack
907 if lookahead
.type == '$end':
908 # Whoa. We're really hosed here. Bail out
911 if lookahead
.type != 'error':
913 if sym
.type == 'error':
914 # Hmmm. Error is on top of stack, we'll just nuke input
915 # symbol and continue
918 sym
.endlineno
= getattr(lookahead
, 'lineno', sym
.lineno
)
919 sym
.endlexpos
= getattr(lookahead
, 'lexpos', sym
.lexpos
)
924 # Create the error symbol for the first time and make it the new lookahead symbol
928 if hasattr(lookahead
, 'lineno'):
929 t
.lineno
= t
.endlineno
= lookahead
.lineno
930 if hasattr(lookahead
, 'lexpos'):
931 t
.lexpos
= t
.endlexpos
= lookahead
.lexpos
933 lookaheadstack
.append(lookahead
)
939 lookahead
.lineno
= sym
.lineno
940 lookahead
.lexpos
= sym
.lexpos
943 state
= statestack
[-1]
947 # Call an error function here
948 raise RuntimeError('yacc: internal parser error!!!\n')
952 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
953 # parseopt_notrack().
955 # Optimized version of parseopt() with line number tracking removed.
956 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
957 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
958 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
960 def parseopt_notrack(self
, input=None, lexer
=None, debug
=False, tracking
=False, tokenfunc
=None):
961 #--! parseopt-notrack-start
962 lookahead
= None # Current lookahead symbol
963 lookaheadstack
= [] # Stack of lookahead symbols
964 actions
= self
.action
# Local reference to action table (to avoid lookup on self.)
965 goto
= self
.goto
# Local reference to goto table (to avoid lookup on self.)
966 prod
= self
.productions
# Local reference to production list (to avoid lookup on self.)
967 defaulted_states
= self
.defaulted_states
# Local reference to defaulted states
968 pslice
= YaccProduction(None) # Production object passed to grammar rules
969 errorcount
= 0 # Used during error recovery
972 # If no lexer was given, we will try to use the lex module
977 # Set up the lexer and parser objects on pslice
981 # If input was supplied, pass to lexer
982 if input is not None:
985 if tokenfunc
is None:
987 get_token
= lexer
.token
989 get_token
= tokenfunc
991 # Set the parser() token method (sometimes used in error recovery)
992 self
.token
= get_token
994 # Set up the state and symbol stacks
996 statestack
= [] # Stack of parsing states
997 self
.statestack
= statestack
998 symstack
= [] # Stack of grammar symbols
999 self
.symstack
= symstack
1001 pslice
.stack
= symstack
# Put in the production
1002 errtoken
= None # Err token
1004 # The start state is assumed to be (0,$end)
1006 statestack
.append(0)
1009 symstack
.append(sym
)
1012 # Get the next symbol on the input. If a lookahead symbol
1013 # is already set, we just use that. Otherwise, we'll pull
1014 # the next token off of the lookaheadstack or from the lexer
1017 if state
not in defaulted_states
:
1019 if not lookaheadstack
:
1020 lookahead
= get_token() # Get the next token
1022 lookahead
= lookaheadstack
.pop()
1024 lookahead
= YaccSymbol()
1025 lookahead
.type = '$end'
1027 # Check the action table
1028 ltype
= lookahead
.type
1029 t
= actions
[state
].get(ltype
)
1031 t
= defaulted_states
[state
]
1036 # shift a symbol on the stack
1037 statestack
.append(t
)
1041 symstack
.append(lookahead
)
1044 # Decrease error count on successful shift
1050 # reduce a symbol on the stack, emit a production
1055 # Get production function
1057 sym
.type = pname
# Production name
1062 targ
= symstack
[-plen
-1:]
1066 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1067 # The code enclosed in this section is duplicated
1068 # below as a performance optimization. Make sure
1069 # changes get made in both locations.
1074 # Call the grammar rule with our special slice object
1075 del symstack
[-plen
:]
1078 del statestack
[-plen
:]
1079 symstack
.append(sym
)
1080 state
= goto
[statestack
[-1]][pname
]
1081 statestack
.append(state
)
1083 # If an error was set. Enter error recovery state
1084 lookaheadstack
.append(lookahead
) # Save the current lookahead token
1085 symstack
.extend(targ
[1:-1]) # Put the production slice back on the stack
1086 statestack
.pop() # Pop back one state (before the reduce)
1087 state
= statestack
[-1]
1091 errorcount
= error_count
1092 self
.errorok
= False
1095 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1102 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1103 # The code enclosed in this section is duplicated
1104 # above as a performance optimization. Make sure
1105 # changes get made in both locations.
1110 # Call the grammar rule with our special slice object
1113 symstack
.append(sym
)
1114 state
= goto
[statestack
[-1]][pname
]
1115 statestack
.append(state
)
1117 # If an error was set. Enter error recovery state
1118 lookaheadstack
.append(lookahead
) # Save the current lookahead token
1119 statestack
.pop() # Pop back one state (before the reduce)
1120 state
= statestack
[-1]
1124 errorcount
= error_count
1125 self
.errorok
= False
1128 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1132 result
= getattr(n
, 'value', None)
1138 # We have some kind of parsing error here. To handle
1139 # this, we are going to push the current token onto
1140 # the tokenstack and replace it with an 'error' token.
1141 # If there are any synchronization rules, they may
1144 # In addition to pushing the error token, we call call
1145 # the user defined p_error() function if this is the
1146 # first syntax error. This function is only called if
1148 if errorcount
== 0 or self
.errorok
:
1149 errorcount
= error_count
1150 self
.errorok
= False
1151 errtoken
= lookahead
1152 if errtoken
.type == '$end':
1153 errtoken
= None # End of file!
1155 if errtoken
and not hasattr(errtoken
, 'lexer'):
1156 errtoken
.lexer
= lexer
1158 tok
= call_errorfunc(self
.errorfunc
, errtoken
, self
)
1160 # User must have done some kind of panic
1161 # mode recovery on their own. The
1162 # returned token is the next lookahead
1168 if hasattr(errtoken
, 'lineno'):
1169 lineno
= lookahead
.lineno
1173 sys
.stderr
.write('yacc: Syntax error at line %d, token=%s\n' % (lineno
, errtoken
.type))
1175 sys
.stderr
.write('yacc: Syntax error, token=%s' % errtoken
.type)
1177 sys
.stderr
.write('yacc: Parse error in input. EOF\n')
1181 errorcount
= error_count
1183 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1184 # entire parse has been rolled back and we're completely hosed. The token is
1185 # discarded and we just keep going.
1187 if len(statestack
) <= 1 and lookahead
.type != '$end':
1191 # Nuke the pushback stack
1192 del lookaheadstack
[:]
1195 # case 2: the statestack has a couple of entries on it, but we're
1196 # at the end of the file. nuke the top entry and generate an error token
1198 # Start nuking entries on the stack
1199 if lookahead
.type == '$end':
1200 # Whoa. We're really hosed here. Bail out
1203 if lookahead
.type != 'error':
1205 if sym
.type == 'error':
1206 # Hmmm. Error is on top of stack, we'll just nuke input
1207 # symbol and continue
1211 # Create the error symbol for the first time and make it the new lookahead symbol
1215 if hasattr(lookahead
, 'lineno'):
1216 t
.lineno
= t
.endlineno
= lookahead
.lineno
1217 if hasattr(lookahead
, 'lexpos'):
1218 t
.lexpos
= t
.endlexpos
= lookahead
.lexpos
1220 lookaheadstack
.append(lookahead
)
1223 sym
= symstack
.pop()
1225 state
= statestack
[-1]
1229 # Call an error function here
1230 raise RuntimeError('yacc: internal parser error!!!\n')
1232 #--! parseopt-notrack-end
1234 # -----------------------------------------------------------------------------
1235 # === Grammar Representation ===
1237 # The following functions, classes, and variables are used to represent and
1238 # manipulate the rules that make up a grammar.
1239 # -----------------------------------------------------------------------------
1241 # regex matching identifiers
1242 _is_identifier
= re
.compile(r
'^[a-zA-Z0-9_-]+$')
1244 # -----------------------------------------------------------------------------
1247 # This class stores the raw information about a single production or grammar rule.
1248 # A grammar rule refers to a specification such as this:
1250 # expr : expr PLUS term
1252 # Here are the basic attributes defined on all productions
1254 # name - Name of the production. For example 'expr'
1255 # prod - A list of symbols on the right side ['expr','PLUS','term']
1256 # prec - Production precedence level
1257 # number - Production number.
1258 # func - Function that executes on reduce
1259 # file - File where production function is defined
1260 # lineno - Line number where production function is defined
1262 # The following attributes are defined or optional.
1264 # len - Length of the production (number of symbols on right hand side)
1265 # usyms - Set of unique symbols found in the production
1266 # -----------------------------------------------------------------------------
1268 class Production(object):
1270 def __init__(self
, number
, name
, prod
, precedence
=('right', 0), func
=None, file='', line
=0):
1272 self
.prod
= tuple(prod
)
1273 self
.number
= number
1275 self
.callable = None
1278 self
.prec
= precedence
1280 # Internal settings used during table construction
1282 self
.len = len(self
.prod
) # Length of the production
1284 # Create a list of unique production symbols used in the production
1287 if s
not in self
.usyms
:
1288 self
.usyms
.append(s
)
1290 # List of all LR items for the production
1294 # Create a string representation
1296 self
.str = '%s -> %s' % (self
.name
, ' '.join(self
.prod
))
1298 self
.str = '%s -> <empty>' % self
.name
1304 return 'Production(' + str(self
) + ')'
1307 return len(self
.prod
)
1309 def __nonzero__(self
):
1312 def __getitem__(self
, index
):
1313 return self
.prod
[index
]
1315 # Return the nth lr_item from the production (or None if at the end)
1316 def lr_item(self
, n
):
1317 if n
> len(self
.prod
):
1320 # Precompute the list of productions immediately following.
1322 p
.lr_after
= self
.Prodnames
[p
.prod
[n
+1]]
1323 except (IndexError, KeyError):
1326 p
.lr_before
= p
.prod
[n
-1]
1331 # Bind the production function name to a callable
1332 def bind(self
, pdict
):
1334 self
.callable = pdict
[self
.func
]
1336 # This class serves as a minimal standin for Production objects when
1337 # reading table data from files. It only contains information
1338 # actually used by the LR parsing engine, plus some additional
1339 # debugging information.
1340 class MiniProduction(object):
1341 def __init__(self
, str, name
, len, func
, file, line
):
1345 self
.callable = None
1354 return 'MiniProduction(%s)' % self
.str
1356 # Bind the production function name to a callable
1357 def bind(self
, pdict
):
1359 self
.callable = pdict
[self
.func
]
1362 # -----------------------------------------------------------------------------
1365 # This class represents a specific stage of parsing a production rule. For
1368 # expr : expr . PLUS term
1370 # In the above, the "." represents the current location of the parse. Here
1373 # name - Name of the production. For example 'expr'
1374 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1375 # number - Production number.
1377 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1378 # then lr_next refers to 'expr -> expr PLUS . term'
1379 # lr_index - LR item index (location of the ".") in the prod list.
1380 # lookaheads - LALR lookahead symbols for this item
1381 # len - Length of the production (number of symbols on right hand side)
1382 # lr_after - List of all productions that immediately follow
1383 # lr_before - Grammar symbol immediately before
1384 # -----------------------------------------------------------------------------
1386 class LRItem(object):
1387 def __init__(self
, p
, n
):
1389 self
.prod
= list(p
.prod
)
1390 self
.number
= p
.number
1392 self
.lookaheads
= {}
1393 self
.prod
.insert(n
, '.')
1394 self
.prod
= tuple(self
.prod
)
1395 self
.len = len(self
.prod
)
1396 self
.usyms
= p
.usyms
1400 s
= '%s -> %s' % (self
.name
, ' '.join(self
.prod
))
1402 s
= '%s -> <empty>' % self
.name
1406 return 'LRItem(' + str(self
) + ')'
1408 # -----------------------------------------------------------------------------
1409 # rightmost_terminal()
1411 # Return the rightmost terminal from a list of symbols. Used in add_production()
1412 # -----------------------------------------------------------------------------
1413 def rightmost_terminal(symbols
, terminals
):
1414 i
= len(symbols
) - 1
1416 if symbols
[i
] in terminals
:
1421 # -----------------------------------------------------------------------------
1422 # === GRAMMAR CLASS ===
1424 # The following class represents the contents of the specified grammar along
1425 # with various computed properties such as first sets, follow sets, LR items, etc.
1426 # This data is used for critical parts of the table generation process later.
1427 # -----------------------------------------------------------------------------
1429 class GrammarError(YaccError
):
1432 class Grammar(object):
1433 def __init__(self
, terminals
):
1434 self
.Productions
= [None] # A list of all of the productions. The first
1435 # entry is always reserved for the purpose of
1436 # building an augmented grammar
1438 self
.Prodnames
= {} # A dictionary mapping the names of nonterminals to a list of all
1439 # productions of that nonterminal.
1441 self
.Prodmap
= {} # A dictionary that is only used to detect duplicate
1444 self
.Terminals
= {} # A dictionary mapping the names of terminal symbols to a
1445 # list of the rules where they are used.
1447 for term
in terminals
:
1448 self
.Terminals
[term
] = []
1450 self
.Terminals
['error'] = []
1452 self
.Nonterminals
= {} # A dictionary mapping names of nonterminals to a list
1453 # of rule numbers where they are used.
1455 self
.First
= {} # A dictionary of precomputed FIRST(x) symbols
1457 self
.Follow
= {} # A dictionary of precomputed FOLLOW(x) symbols
1459 self
.Precedence
= {} # Precedence rules for each terminal. Contains tuples of the
1460 # form ('right',level) or ('nonassoc', level) or ('left',level)
1462 self
.UsedPrecedence
= set() # Precedence rules that were actually used by the grammar.
1463 # This is only used to provide error checking and to generate
1464 # a warning about unused precedence rules.
1466 self
.Start
= None # Starting symbol for the grammar
1470 return len(self
.Productions
)
1472 def __getitem__(self
, index
):
1473 return self
.Productions
[index
]
1475 # -----------------------------------------------------------------------------
1478 # Sets the precedence for a given terminal. assoc is the associativity such as
1479 # 'left','right', or 'nonassoc'. level is a numeric level.
1481 # -----------------------------------------------------------------------------
1483 def set_precedence(self
, term
, assoc
, level
):
1484 assert self
.Productions
== [None], 'Must call set_precedence() before add_production()'
1485 if term
in self
.Precedence
:
1486 raise GrammarError('Precedence already specified for terminal %r' % term
)
1487 if assoc
not in ['left', 'right', 'nonassoc']:
1488 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1489 self
.Precedence
[term
] = (assoc
, level
)
1491 # -----------------------------------------------------------------------------
1494 # Given an action function, this function assembles a production rule and
1495 # computes its precedence level.
1497 # The production rule is supplied as a list of symbols. For example,
1498 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1499 # symbols ['expr','PLUS','term'].
1501 # Precedence is determined by the precedence of the right-most non-terminal
1502 # or the precedence of a terminal specified by %prec.
1504 # A variety of error checks are performed to make sure production symbols
1505 # are valid and that %prec is used correctly.
1506 # -----------------------------------------------------------------------------
1508 def add_production(self
, prodname
, syms
, func
=None, file='', line
=0):
1510 if prodname
in self
.Terminals
:
1511 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line
, prodname
))
1512 if prodname
== 'error':
1513 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line
, prodname
))
1514 if not _is_identifier
.match(prodname
):
1515 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line
, prodname
))
1517 # Look for literal tokens
1518 for n
, s
in enumerate(syms
):
1523 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1524 (file, line
, s
, prodname
))
1525 if c
not in self
.Terminals
:
1526 self
.Terminals
[c
] = []
1531 if not _is_identifier
.match(s
) and s
!= '%prec':
1532 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line
, s
, prodname
))
1534 # Determine the precedence level
1536 if syms
[-1] == '%prec':
1537 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line
))
1538 if syms
[-2] != '%prec':
1539 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1542 prodprec
= self
.Precedence
.get(precname
)
1544 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line
, precname
))
1546 self
.UsedPrecedence
.add(precname
)
1547 del syms
[-2:] # Drop %prec from the rule
1549 # If no %prec, precedence is determined by the rightmost terminal symbol
1550 precname
= rightmost_terminal(syms
, self
.Terminals
)
1551 prodprec
= self
.Precedence
.get(precname
, ('right', 0))
1553 # See if the rule is already in the rulemap
1554 map = '%s -> %s' % (prodname
, syms
)
1555 if map in self
.Prodmap
:
1556 m
= self
.Prodmap
[map]
1557 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line
, m
) +
1558 'Previous definition at %s:%d' % (m
.file, m
.line
))
1560 # From this point on, everything is valid. Create a new Production instance
1561 pnumber
= len(self
.Productions
)
1562 if prodname
not in self
.Nonterminals
:
1563 self
.Nonterminals
[prodname
] = []
1565 # Add the production number to Terminals and Nonterminals
1567 if t
in self
.Terminals
:
1568 self
.Terminals
[t
].append(pnumber
)
1570 if t
not in self
.Nonterminals
:
1571 self
.Nonterminals
[t
] = []
1572 self
.Nonterminals
[t
].append(pnumber
)
1574 # Create a production and add it to the list of productions
1575 p
= Production(pnumber
, prodname
, syms
, prodprec
, func
, file, line
)
1576 self
.Productions
.append(p
)
1577 self
.Prodmap
[map] = p
1579 # Add to the global productions list
1581 self
.Prodnames
[prodname
].append(p
)
1583 self
.Prodnames
[prodname
] = [p
]
1585 # -----------------------------------------------------------------------------
1588 # Sets the starting symbol and creates the augmented grammar. Production
1589 # rule 0 is S' -> start where start is the start symbol.
1590 # -----------------------------------------------------------------------------
1592 def set_start(self
, start
=None):
1594 start
= self
.Productions
[1].name
1595 if start
not in self
.Nonterminals
:
1596 raise GrammarError('start symbol %s undefined' % start
)
1597 self
.Productions
[0] = Production(0, "S'", [start
])
1598 self
.Nonterminals
[start
].append(0)
1601 # -----------------------------------------------------------------------------
1602 # find_unreachable()
1604 # Find all of the nonterminal symbols that can't be reached from the starting
1605 # symbol. Returns a list of nonterminals that can't be reached.
1606 # -----------------------------------------------------------------------------
1608 def find_unreachable(self
):
1610 # Mark all symbols that are reachable from a symbol s
1611 def mark_reachable_from(s
):
1615 for p
in self
.Prodnames
.get(s
, []):
1617 mark_reachable_from(r
)
1620 mark_reachable_from(self
.Productions
[0].prod
[0])
1621 return [s
for s
in self
.Nonterminals
if s
not in reachable
]
1623 # -----------------------------------------------------------------------------
1626 # This function looks at the various parsing rules and tries to detect
1627 # infinite recursion cycles (grammar rules where there is no possible way
1628 # to derive a string of only terminals).
1629 # -----------------------------------------------------------------------------
1631 def infinite_cycles(self
):
1635 for t
in self
.Terminals
:
1636 terminates
[t
] = True
1638 terminates
['$end'] = True
1642 # Initialize to false:
1643 for n
in self
.Nonterminals
:
1644 terminates
[n
] = False
1646 # Then propagate termination until no change:
1649 for (n
, pl
) in self
.Prodnames
.items():
1650 # Nonterminal n terminates iff any of its productions terminates.
1652 # Production p terminates iff all of its rhs symbols terminate.
1654 if not terminates
[s
]:
1655 # The symbol s does not terminate,
1656 # so production p does not terminate.
1657 p_terminates
= False
1660 # didn't break from the loop,
1661 # so every symbol s terminates
1662 # so production p terminates.
1666 # symbol n terminates!
1667 if not terminates
[n
]:
1668 terminates
[n
] = True
1670 # Don't need to consider any more productions for this n.
1677 for (s
, term
) in terminates
.items():
1679 if s
not in self
.Prodnames
and s
not in self
.Terminals
and s
!= 'error':
1680 # s is used-but-not-defined, and we've already warned of that,
1681 # so it would be overkill to say that it's also non-terminating.
1688 # -----------------------------------------------------------------------------
1689 # undefined_symbols()
1691 # Find all symbols that were used the grammar, but not defined as tokens or
1692 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1693 # and prod is the production where the symbol was used.
1694 # -----------------------------------------------------------------------------
1695 def undefined_symbols(self
):
1697 for p
in self
.Productions
:
1702 if s
not in self
.Prodnames
and s
not in self
.Terminals
and s
!= 'error':
1703 result
.append((s
, p
))
1706 # -----------------------------------------------------------------------------
1707 # unused_terminals()
1709 # Find all terminals that were defined, but not used by the grammar. Returns
1710 # a list of all symbols.
1711 # -----------------------------------------------------------------------------
1712 def unused_terminals(self
):
1714 for s
, v
in self
.Terminals
.items():
1715 if s
!= 'error' and not v
:
1716 unused_tok
.append(s
)
1720 # ------------------------------------------------------------------------------
1723 # Find all grammar rules that were defined, but not used (maybe not reachable)
1724 # Returns a list of productions.
1725 # ------------------------------------------------------------------------------
1727 def unused_rules(self
):
1729 for s
, v
in self
.Nonterminals
.items():
1731 p
= self
.Prodnames
[s
][0]
1732 unused_prod
.append(p
)
1735 # -----------------------------------------------------------------------------
1736 # unused_precedence()
1738 # Returns a list of tuples (term,precedence) corresponding to precedence
1739 # rules that were never used by the grammar. term is the name of the terminal
1740 # on which precedence was applied and precedence is a string such as 'left' or
1741 # 'right' corresponding to the type of precedence.
1742 # -----------------------------------------------------------------------------
1744 def unused_precedence(self
):
1746 for termname
in self
.Precedence
:
1747 if not (termname
in self
.Terminals
or termname
in self
.UsedPrecedence
):
1748 unused
.append((termname
, self
.Precedence
[termname
][0]))
1752 # -------------------------------------------------------------------------
1755 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1757 # During execution of compute_first1, the result may be incomplete.
1758 # Afterward (e.g., when called from compute_follow()), it will be complete.
1759 # -------------------------------------------------------------------------
1760 def _first(self
, beta
):
1762 # We are computing First(x1,x2,x3,...,xn)
1765 x_produces_empty
= False
1767 # Add all the non-<empty> symbols of First[x] to the result.
1768 for f
in self
.First
[x
]:
1770 x_produces_empty
= True
1775 if x_produces_empty
:
1776 # We have to consider the next x in beta,
1777 # i.e. stay in the loop.
1780 # We don't have to consider any further symbols in beta.
1783 # There was no 'break' from the loop,
1784 # so x_produces_empty was true for all x in beta,
1785 # so beta produces empty as well.
1786 result
.append('<empty>')
1790 # -------------------------------------------------------------------------
1793 # Compute the value of FIRST1(X) for all symbols
1794 # -------------------------------------------------------------------------
1795 def compute_first(self
):
1800 for t
in self
.Terminals
:
1803 self
.First
['$end'] = ['$end']
1807 # Initialize to the empty set:
1808 for n
in self
.Nonterminals
:
1811 # Then propagate symbols until no change:
1814 for n
in self
.Nonterminals
:
1815 for p
in self
.Prodnames
[n
]:
1816 for f
in self
._first
(p
.prod
):
1817 if f
not in self
.First
[n
]:
1818 self
.First
[n
].append(f
)
1825 # ---------------------------------------------------------------------
1828 # Computes all of the follow sets for every non-terminal symbol. The
1829 # follow set is the set of all symbols that might follow a given
1830 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1831 # ---------------------------------------------------------------------
1832 def compute_follow(self
, start
=None):
1833 # If already computed, return the result
1837 # If first sets not computed yet, do that first.
1839 self
.compute_first()
1841 # Add '$end' to the follow list of the start symbol
1842 for k
in self
.Nonterminals
:
1846 start
= self
.Productions
[1].name
1848 self
.Follow
[start
] = ['$end']
1852 for p
in self
.Productions
[1:]:
1853 # Here is the production set
1854 for i
, B
in enumerate(p
.prod
):
1855 if B
in self
.Nonterminals
:
1856 # Okay. We got a non-terminal in a production
1857 fst
= self
._first
(p
.prod
[i
+1:])
1860 if f
!= '<empty>' and f
not in self
.Follow
[B
]:
1861 self
.Follow
[B
].append(f
)
1865 if hasempty
or i
== (len(p
.prod
)-1):
1866 # Add elements of follow(a) to follow(b)
1867 for f
in self
.Follow
[p
.name
]:
1868 if f
not in self
.Follow
[B
]:
1869 self
.Follow
[B
].append(f
)
1876 # -----------------------------------------------------------------------------
1879 # This function walks the list of productions and builds a complete set of the
1880 # LR items. The LR items are stored in two ways: First, they are uniquely
1881 # numbered and placed in the list _lritems. Second, a linked list of LR items
1882 # is built for each production. For example:
1888 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1889 # -----------------------------------------------------------------------------
1891 def build_lritems(self
):
1892 for p
in self
.Productions
:
1901 # Precompute the list of productions immediately following
1903 lri
.lr_after
= self
.Prodnames
[lri
.prod
[i
+1]]
1904 except (IndexError, KeyError):
1907 lri
.lr_before
= lri
.prod
[i
-1]
1909 lri
.lr_before
= None
1911 lastlri
.lr_next
= lri
1914 lr_items
.append(lri
)
1917 p
.lr_items
= lr_items
1919 # -----------------------------------------------------------------------------
1920 # == Class LRTable ==
1922 # This basic class represents a basic table of LR parsing information.
1923 # Methods for generating the tables are not defined here. They are defined
1924 # in the derived class LRGeneratedTable.
1925 # -----------------------------------------------------------------------------
1927 class VersionError(YaccError
):
1930 class LRTable(object):
1932 self
.lr_action
= None
1934 self
.lr_productions
= None
1935 self
.lr_method
= None
1937 def read_table(self
, module
):
1938 if isinstance(module
, types
.ModuleType
):
1941 exec('import %s' % module
)
1942 parsetab
= sys
.modules
[module
]
1944 if parsetab
._tabversion
!= __tabversion__
:
1945 raise VersionError('yacc table file version is out of date')
1947 self
.lr_action
= parsetab
._lr
_action
1948 self
.lr_goto
= parsetab
._lr
_goto
1950 self
.lr_productions
= []
1951 for p
in parsetab
._lr
_productions
:
1952 self
.lr_productions
.append(MiniProduction(*p
))
1954 self
.lr_method
= parsetab
._lr
_method
1955 return parsetab
._lr
_signature
1957 def read_pickle(self
, filename
):
1959 import cPickle
as pickle
1963 if not os
.path
.exists(filename
):
1966 in_f
= open(filename
, 'rb')
1968 tabversion
= pickle
.load(in_f
)
1969 if tabversion
!= __tabversion__
:
1970 raise VersionError('yacc table file version is out of date')
1971 self
.lr_method
= pickle
.load(in_f
)
1972 signature
= pickle
.load(in_f
)
1973 self
.lr_action
= pickle
.load(in_f
)
1974 self
.lr_goto
= pickle
.load(in_f
)
1975 productions
= pickle
.load(in_f
)
1977 self
.lr_productions
= []
1978 for p
in productions
:
1979 self
.lr_productions
.append(MiniProduction(*p
))
1984 # Bind all production function names to callable objects in pdict
1985 def bind_callables(self
, pdict
):
1986 for p
in self
.lr_productions
:
1990 # -----------------------------------------------------------------------------
1991 # === LR Generator ===
1993 # The following classes and functions are used to generate LR parsing tables on
1995 # -----------------------------------------------------------------------------
1997 # -----------------------------------------------------------------------------
2001 # The following two functions are used to compute set valued functions
2004 # F(x) = F'(x) U U{F(y) | x R y}
2006 # This is used to compute the values of Read() sets as well as FOLLOW sets
2007 # in LALR(1) generation.
2009 # Inputs: X - An input set
2011 # FP - Set-valued function
2012 # ------------------------------------------------------------------------------
2014 def digraph(X
, R
, FP
):
2022 traverse(x
, N
, stack
, F
, X
, R
, FP
)
2025 def traverse(x
, N
, stack
, F
, X
, R
, FP
):
2029 F
[x
] = FP(x
) # F(X) <- F'(x)
2031 rel
= R(x
) # Get y's related to x
2034 traverse(y
, N
, stack
, F
, X
, R
, FP
)
2035 N
[x
] = min(N
[x
], N
[y
])
2036 for a
in F
.get(y
, []):
2040 N
[stack
[-1]] = MAXINT
2042 element
= stack
.pop()
2044 N
[stack
[-1]] = MAXINT
2046 element
= stack
.pop()
2048 class LALRError(YaccError
):
2051 # -----------------------------------------------------------------------------
2052 # == LRGeneratedTable ==
2054 # This class implements the LR table generation algorithm. There are no
2055 # public methods except for write()
2056 # -----------------------------------------------------------------------------
2058 class LRGeneratedTable(LRTable
):
2059 def __init__(self
, grammar
, method
='LALR', log
=None):
2060 if method
not in ['SLR', 'LALR']:
2061 raise LALRError('Unsupported method %s' % method
)
2063 self
.grammar
= grammar
2064 self
.lr_method
= method
2071 # Internal attributes
2072 self
.lr_action
= {} # Action table
2073 self
.lr_goto
= {} # Goto table
2074 self
.lr_productions
= grammar
.Productions
# Copy of grammar Production array
2075 self
.lr_goto_cache
= {} # Cache of computed gotos
2076 self
.lr0_cidhash
= {} # Cache of closures
2078 self
._add
_count
= 0 # Internal counter used to detect cycles
2080 # Diagonistic information filled in by the table generator
2081 self
.sr_conflict
= 0
2082 self
.rr_conflict
= 0
2083 self
.conflicts
= [] # List of conflicts
2085 self
.sr_conflicts
= []
2086 self
.rr_conflicts
= []
2089 self
.grammar
.build_lritems()
2090 self
.grammar
.compute_first()
2091 self
.grammar
.compute_follow()
2092 self
.lr_parse_table()
2094 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2096 def lr0_closure(self
, I
):
2097 self
._add
_count
+= 1
2099 # Add everything in I to J
2105 for x
in j
.lr_after
:
2106 if getattr(x
, 'lr0_added', 0) == self
._add
_count
:
2110 x
.lr0_added
= self
._add
_count
2115 # Compute the LR(0) goto function goto(I,X) where I is a set
2116 # of LR(0) items and X is a grammar symbol. This function is written
2117 # in a way that guarantees uniqueness of the generated goto sets
2118 # (i.e. the same goto set will never be returned as two different Python
2119 # objects). With uniqueness, we can later do fast set comparisons using
2120 # id(obj) instead of element-wise comparison.
2122 def lr0_goto(self
, I
, x
):
2123 # First we look for a previously cached entry
2124 g
= self
.lr_goto_cache
.get((id(I
), x
))
2128 # Now we generate the goto set in a way that guarantees uniqueness
2131 s
= self
.lr_goto_cache
.get(x
)
2134 self
.lr_goto_cache
[x
] = s
2139 if n
and n
.lr_before
== x
:
2149 g
= self
.lr0_closure(gs
)
2153 self
.lr_goto_cache
[(id(I
), x
)] = g
2156 # Compute the LR(0) sets of item function
2157 def lr0_items(self
):
2158 C
= [self
.lr0_closure([self
.grammar
.Productions
[0].lr_next
])]
2161 self
.lr0_cidhash
[id(I
)] = i
2164 # Loop over the items in C and each grammar symbols
2170 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2177 g
= self
.lr0_goto(I
, x
)
2178 if not g
or id(g
) in self
.lr0_cidhash
:
2180 self
.lr0_cidhash
[id(g
)] = len(C
)
2185 # -----------------------------------------------------------------------------
2186 # ==== LALR(1) Parsing ====
2188 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2189 # relying upon Follow() sets when performing reductions, a more selective
2190 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2191 # Thus, we mainly just have to focus on calculating the lookahead sets.
2193 # The method used here is due to DeRemer and Pennelo (1982).
2195 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2196 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2197 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2199 # Further details can also be found in:
2201 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2202 # McGraw-Hill Book Company, (1985).
2204 # -----------------------------------------------------------------------------
2206 # -----------------------------------------------------------------------------
2207 # compute_nullable_nonterminals()
2209 # Creates a dictionary containing all of the non-terminals that might produce
2210 # an empty production.
2211 # -----------------------------------------------------------------------------
2213 def compute_nullable_nonterminals(self
):
2217 for p
in self
.grammar
.Productions
[1:]:
2219 nullable
.add(p
.name
)
2222 if t
not in nullable
:
2225 nullable
.add(p
.name
)
2226 if len(nullable
) == num_nullable
:
2228 num_nullable
= len(nullable
)
2231 # -----------------------------------------------------------------------------
2232 # find_nonterminal_trans(C)
2234 # Given a set of LR(0) items, this functions finds all of the non-terminal
2235 # transitions. These are transitions in which a dot appears immediately before
2236 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2237 # is the state number and N is the nonterminal symbol.
2239 # The input C is the set of LR(0) items.
2240 # -----------------------------------------------------------------------------
2242 def find_nonterminal_transitions(self
, C
):
2244 for stateno
, state
in enumerate(C
):
2246 if p
.lr_index
< p
.len - 1:
2247 t
= (stateno
, p
.prod
[p
.lr_index
+1])
2248 if t
[1] in self
.grammar
.Nonterminals
:
2253 # -----------------------------------------------------------------------------
2256 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2257 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2259 # Returns a list of terminals.
2260 # -----------------------------------------------------------------------------
2262 def dr_relation(self
, C
, trans
, nullable
):
2266 g
= self
.lr0_goto(C
[state
], N
)
2268 if p
.lr_index
< p
.len - 1:
2269 a
= p
.prod
[p
.lr_index
+1]
2270 if a
in self
.grammar
.Terminals
:
2274 # This extra bit is to handle the start state
2275 if state
== 0 and N
== self
.grammar
.Productions
[0].prod
[0]:
2276 terms
.append('$end')
2280 # -----------------------------------------------------------------------------
2283 # Computes the READS() relation (p,A) READS (t,C).
2284 # -----------------------------------------------------------------------------
2286 def reads_relation(self
, C
, trans
, empty
):
2287 # Look for empty transitions
2291 g
= self
.lr0_goto(C
[state
], N
)
2292 j
= self
.lr0_cidhash
.get(id(g
), -1)
2294 if p
.lr_index
< p
.len - 1:
2295 a
= p
.prod
[p
.lr_index
+ 1]
2301 # -----------------------------------------------------------------------------
2302 # compute_lookback_includes()
2304 # Determines the lookback and includes relations
2308 # This relation is determined by running the LR(0) state machine forward.
2309 # For example, starting with a production "N : . A B C", we run it forward
2310 # to obtain "N : A B C ." We then build a relationship between this final
2311 # state and the starting state. These relationships are stored in a dictionary
2316 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2318 # This relation is used to determine non-terminal transitions that occur
2319 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2320 # if the following holds:
2322 # B -> LAT, where T -> epsilon and p' -L-> p
2324 # L is essentially a prefix (which may be empty), T is a suffix that must be
2325 # able to derive an empty string. State p' must lead to state p with the string L.
2327 # -----------------------------------------------------------------------------
2329 def compute_lookback_includes(self
, C
, trans
, nullable
):
2330 lookdict
= {} # Dictionary of lookback relations
2331 includedict
= {} # Dictionary of include relations
2333 # Make a dictionary of non-terminal transitions
2338 # Loop over all transitions and compute lookbacks and includes
2339 for state
, N
in trans
:
2346 # Okay, we have a name match. We now follow the production all the way
2347 # through the state machine until we get the . on the right hand side
2349 lr_index
= p
.lr_index
2351 while lr_index
< p
.len - 1:
2352 lr_index
= lr_index
+ 1
2353 t
= p
.prod
[lr_index
]
2355 # Check to see if this symbol and state are a non-terminal transition
2356 if (j
, t
) in dtrans
:
2357 # Yes. Okay, there is some chance that this is an includes relation
2358 # the only way to know for certain is whether the rest of the
2359 # production derives empty
2363 if p
.prod
[li
] in self
.grammar
.Terminals
:
2364 break # No forget it
2365 if p
.prod
[li
] not in nullable
:
2369 # Appears to be a relation between (j,t) and (state,N)
2370 includes
.append((j
, t
))
2372 g
= self
.lr0_goto(C
[j
], t
) # Go to next set
2373 j
= self
.lr0_cidhash
.get(id(g
), -1) # Go to next state
2375 # When we get here, j is the final state, now we have to locate the production
2377 if r
.name
!= p
.name
:
2382 # This look is comparing a production ". A B C" with "A B C ."
2383 while i
< r
.lr_index
:
2384 if r
.prod
[i
] != p
.prod
[i
+1]:
2388 lookb
.append((j
, r
))
2390 if i
not in includedict
:
2392 includedict
[i
].append((state
, N
))
2393 lookdict
[(state
, N
)] = lookb
2395 return lookdict
, includedict
2397 # -----------------------------------------------------------------------------
2398 # compute_read_sets()
2400 # Given a set of LR(0) items, this function computes the read sets.
2402 # Inputs: C = Set of LR(0) items
2403 # ntrans = Set of nonterminal transitions
2404 # nullable = Set of empty transitions
2406 # Returns a set containing the read sets
2407 # -----------------------------------------------------------------------------
2409 def compute_read_sets(self
, C
, ntrans
, nullable
):
2410 FP
= lambda x
: self
.dr_relation(C
, x
, nullable
)
2411 R
= lambda x
: self
.reads_relation(C
, x
, nullable
)
2412 F
= digraph(ntrans
, R
, FP
)
2415 # -----------------------------------------------------------------------------
2416 # compute_follow_sets()
2418 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2419 # and an include set, this function computes the follow sets
2421 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2424 # ntrans = Set of nonterminal transitions
2425 # readsets = Readset (previously computed)
2426 # inclsets = Include sets (previously computed)
2428 # Returns a set containing the follow sets
2429 # -----------------------------------------------------------------------------
2431 def compute_follow_sets(self
, ntrans
, readsets
, inclsets
):
2432 FP
= lambda x
: readsets
[x
]
2433 R
= lambda x
: inclsets
.get(x
, [])
2434 F
= digraph(ntrans
, R
, FP
)
2437 # -----------------------------------------------------------------------------
2440 # Attaches the lookahead symbols to grammar rules.
2442 # Inputs: lookbacks - Set of lookback relations
2443 # followset - Computed follow set
2445 # This function directly attaches the lookaheads to productions contained
2446 # in the lookbacks set
2447 # -----------------------------------------------------------------------------
2449 def add_lookaheads(self
, lookbacks
, followset
):
2450 for trans
, lb
in lookbacks
.items():
2451 # Loop over productions in lookback
2453 if state
not in p
.lookaheads
:
2454 p
.lookaheads
[state
] = []
2455 f
= followset
.get(trans
, [])
2457 if a
not in p
.lookaheads
[state
]:
2458 p
.lookaheads
[state
].append(a
)
2460 # -----------------------------------------------------------------------------
2461 # add_lalr_lookaheads()
2463 # This function does all of the work of adding lookahead information for use
2465 # -----------------------------------------------------------------------------
2467 def add_lalr_lookaheads(self
, C
):
2468 # Determine all of the nullable nonterminals
2469 nullable
= self
.compute_nullable_nonterminals()
2471 # Find all non-terminal transitions
2472 trans
= self
.find_nonterminal_transitions(C
)
2475 readsets
= self
.compute_read_sets(C
, trans
, nullable
)
2477 # Compute lookback/includes relations
2478 lookd
, included
= self
.compute_lookback_includes(C
, trans
, nullable
)
2480 # Compute LALR FOLLOW sets
2481 followsets
= self
.compute_follow_sets(trans
, readsets
, included
)
2483 # Add all of the lookaheads
2484 self
.add_lookaheads(lookd
, followsets
)
2486 # -----------------------------------------------------------------------------
2489 # This function constructs the parse tables for SLR or LALR
2490 # -----------------------------------------------------------------------------
2491 def lr_parse_table(self
):
2492 Productions
= self
.grammar
.Productions
2493 Precedence
= self
.grammar
.Precedence
2494 goto
= self
.lr_goto
# Goto array
2495 action
= self
.lr_action
# Action array
2496 log
= self
.log
# Logger for output
2498 actionp
= {} # Action production array (temporary)
2500 log
.info('Parsing method: %s', self
.lr_method
)
2502 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2503 # This determines the number of states
2505 C
= self
.lr0_items()
2507 if self
.lr_method
== 'LALR':
2508 self
.add_lalr_lookaheads(C
)
2510 # Build the parser table, state by state
2513 # Loop over each production in I
2514 actlist
= [] # List of actions
2519 log
.info('state %d', st
)
2522 log
.info(' (%d) %s', p
.number
, p
)
2526 if p
.len == p
.lr_index
+ 1:
2528 # Start symbol. Accept!
2529 st_action
['$end'] = 0
2530 st_actionp
['$end'] = p
2532 # We are at the end of a production. Reduce!
2533 if self
.lr_method
== 'LALR':
2534 laheads
= p
.lookaheads
[st
]
2536 laheads
= self
.grammar
.Follow
[p
.name
]
2538 actlist
.append((a
, p
, 'reduce using rule %d (%s)' % (p
.number
, p
)))
2539 r
= st_action
.get(a
)
2541 # Whoa. Have a shift/reduce or reduce/reduce conflict
2543 # Need to decide on shift or reduce here
2544 # By default we favor shifting. Need to add
2545 # some precedence rules here.
2547 # Shift precedence comes from the token
2548 sprec
, slevel
= Precedence
.get(a
, ('right', 0))
2550 # Reduce precedence comes from rule being reduced (p)
2551 rprec
, rlevel
= Productions
[p
.number
].prec
2553 if (slevel
< rlevel
) or ((slevel
== rlevel
) and (rprec
== 'left')):
2554 # We really need to reduce here.
2555 st_action
[a
] = -p
.number
2557 if not slevel
and not rlevel
:
2558 log
.info(' ! shift/reduce conflict for %s resolved as reduce', a
)
2559 self
.sr_conflicts
.append((st
, a
, 'reduce'))
2560 Productions
[p
.number
].reduced
+= 1
2561 elif (slevel
== rlevel
) and (rprec
== 'nonassoc'):
2564 # Hmmm. Guess we'll keep the shift
2566 log
.info(' ! shift/reduce conflict for %s resolved as shift', a
)
2567 self
.sr_conflicts
.append((st
, a
, 'shift'))
2569 # Reduce/reduce conflict. In this case, we favor the rule
2570 # that was defined first in the grammar file
2571 oldp
= Productions
[-r
]
2572 pp
= Productions
[p
.number
]
2573 if oldp
.line
> pp
.line
:
2574 st_action
[a
] = -p
.number
2576 chosenp
, rejectp
= pp
, oldp
2577 Productions
[p
.number
].reduced
+= 1
2578 Productions
[oldp
.number
].reduced
-= 1
2580 chosenp
, rejectp
= oldp
, pp
2581 self
.rr_conflicts
.append((st
, chosenp
, rejectp
))
2582 log
.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2583 a
, st_actionp
[a
].number
, st_actionp
[a
])
2585 raise LALRError('Unknown conflict in state %d' % st
)
2587 st_action
[a
] = -p
.number
2589 Productions
[p
.number
].reduced
+= 1
2592 a
= p
.prod
[i
+1] # Get symbol right after the "."
2593 if a
in self
.grammar
.Terminals
:
2594 g
= self
.lr0_goto(I
, a
)
2595 j
= self
.lr0_cidhash
.get(id(g
), -1)
2597 # We are in a shift state
2598 actlist
.append((a
, p
, 'shift and go to state %d' % j
))
2599 r
= st_action
.get(a
)
2601 # Whoa have a shift/reduce or shift/shift conflict
2604 raise LALRError('Shift/shift conflict in state %d' % st
)
2606 # Do a precedence check.
2607 # - if precedence of reduce rule is higher, we reduce.
2608 # - if precedence of reduce is same and left assoc, we reduce.
2609 # - otherwise we shift
2611 # Shift precedence comes from the token
2612 sprec
, slevel
= Precedence
.get(a
, ('right', 0))
2614 # Reduce precedence comes from the rule that could have been reduced
2615 rprec
, rlevel
= Productions
[st_actionp
[a
].number
].prec
2617 if (slevel
> rlevel
) or ((slevel
== rlevel
) and (rprec
== 'right')):
2618 # We decide to shift here... highest precedence to shift
2619 Productions
[st_actionp
[a
].number
].reduced
-= 1
2623 log
.info(' ! shift/reduce conflict for %s resolved as shift', a
)
2624 self
.sr_conflicts
.append((st
, a
, 'shift'))
2625 elif (slevel
== rlevel
) and (rprec
== 'nonassoc'):
2628 # Hmmm. Guess we'll keep the reduce
2629 if not slevel
and not rlevel
:
2630 log
.info(' ! shift/reduce conflict for %s resolved as reduce', a
)
2631 self
.sr_conflicts
.append((st
, a
, 'reduce'))
2634 raise LALRError('Unknown conflict in state %d' % st
)
2639 # Print the actions associated with each terminal
2641 for a
, p
, m
in actlist
:
2643 if p
is st_actionp
[a
]:
2644 log
.info(' %-15s %s', a
, m
)
2645 _actprint
[(a
, m
)] = 1
2647 # Print the actions that were not used. (debugging)
2649 for a
, p
, m
in actlist
:
2651 if p
is not st_actionp
[a
]:
2652 if not (a
, m
) in _actprint
:
2653 log
.debug(' ! %-15s [ %s ]', a
, m
)
2655 _actprint
[(a
, m
)] = 1
2659 # Construct the goto table for this state
2664 if s
in self
.grammar
.Nonterminals
:
2667 g
= self
.lr0_goto(I
, n
)
2668 j
= self
.lr0_cidhash
.get(id(g
), -1)
2671 log
.info(' %-30s shift and go to state %d', n
, j
)
2673 action
[st
] = st_action
2674 actionp
[st
] = st_actionp
2678 # -----------------------------------------------------------------------------
2681 # This function writes the LR parsing tables to a file
2682 # -----------------------------------------------------------------------------
2684 def write_table(self
, tabmodule
, outputdir
='', signature
=''):
2685 if isinstance(tabmodule
, types
.ModuleType
):
2686 raise IOError("Won't overwrite existing tabmodule")
2688 basemodulename
= tabmodule
.split('.')[-1]
2689 filename
= os
.path
.join(outputdir
, basemodulename
) + '.py'
2691 f
= open(filename
, 'w')
2695 # This file is automatically generated. Do not edit.
2696 # pylint: disable=W,C,R
2702 ''' % (os
.path
.basename(filename
), __tabversion__
, self
.lr_method
, signature
))
2704 # Change smaller to 0 to go back to original tables
2707 # Factor out names to try and make smaller
2711 for s
, nd
in self
.lr_action
.items():
2712 for name
, v
in nd
.items():
2720 f
.write('\n_lr_action_items = {')
2721 for k
, v
in items
.items():
2722 f
.write('%r:([' % k
)
2734 for _k, _v in _lr_action_items.items():
2735 for _x,_y in zip(_v[0],_v[1]):
2736 if not _x in _lr_action: _lr_action[_x] = {}
2737 _lr_action[_x][_k] = _y
2738 del _lr_action_items
2742 f
.write('\n_lr_action = { ')
2743 for k
, v
in self
.lr_action
.items():
2744 f
.write('(%r,%r):%r,' % (k
[0], k
[1], v
))
2748 # Factor out names to try and make smaller
2751 for s
, nd
in self
.lr_goto
.items():
2752 for name
, v
in nd
.items():
2760 f
.write('\n_lr_goto_items = {')
2761 for k
, v
in items
.items():
2762 f
.write('%r:([' % k
)
2774 for _k, _v in _lr_goto_items.items():
2775 for _x, _y in zip(_v[0], _v[1]):
2776 if not _x in _lr_goto: _lr_goto[_x] = {}
2777 _lr_goto[_x][_k] = _y
2781 f
.write('\n_lr_goto = { ')
2782 for k
, v
in self
.lr_goto
.items():
2783 f
.write('(%r,%r):%r,' % (k
[0], k
[1], v
))
2786 # Write production table
2787 f
.write('_lr_productions = [\n')
2788 for p
in self
.lr_productions
:
2790 f
.write(' (%r,%r,%d,%r,%r,%d),\n' % (p
.str, p
.name
, p
.len,
2791 p
.func
, os
.path
.basename(p
.file), p
.line
))
2793 f
.write(' (%r,%r,%d,None,None,None),\n' % (str(p
), p
.name
, p
.len))
2797 except IOError as e
:
2801 # -----------------------------------------------------------------------------
2804 # This function pickles the LR parsing tables to a supplied file object
2805 # -----------------------------------------------------------------------------
2807 def pickle_table(self
, filename
, signature
=''):
2809 import cPickle
as pickle
2812 with
open(filename
, 'wb') as outf
:
2813 pickle
.dump(__tabversion__
, outf
, pickle_protocol
)
2814 pickle
.dump(self
.lr_method
, outf
, pickle_protocol
)
2815 pickle
.dump(signature
, outf
, pickle_protocol
)
2816 pickle
.dump(self
.lr_action
, outf
, pickle_protocol
)
2817 pickle
.dump(self
.lr_goto
, outf
, pickle_protocol
)
2820 for p
in self
.lr_productions
:
2822 outp
.append((p
.str, p
.name
, p
.len, p
.func
, os
.path
.basename(p
.file), p
.line
))
2824 outp
.append((str(p
), p
.name
, p
.len, None, None, None))
2825 pickle
.dump(outp
, outf
, pickle_protocol
)
2827 # -----------------------------------------------------------------------------
2828 # === INTROSPECTION ===
2830 # The following functions and classes are used to implement the PLY
2831 # introspection features followed by the yacc() function itself.
2832 # -----------------------------------------------------------------------------
2834 # -----------------------------------------------------------------------------
2835 # get_caller_module_dict()
2837 # This function returns a dictionary containing all of the symbols defined within
2838 # a caller further down the call stack. This is used to get the environment
2839 # associated with the yacc() call if none was provided.
2840 # -----------------------------------------------------------------------------
2842 def get_caller_module_dict(levels
):
2843 f
= sys
._getframe
(levels
)
2844 ldict
= f
.f_globals
.copy()
2845 if f
.f_globals
!= f
.f_locals
:
2846 ldict
.update(f
.f_locals
)
2849 # -----------------------------------------------------------------------------
2852 # This takes a raw grammar rule string and parses it into production data
2853 # -----------------------------------------------------------------------------
2854 def parse_grammar(doc
, file, line
):
2856 # Split the doc string into lines
2857 pstrings
= doc
.splitlines()
2867 # This is a continuation of a previous rule
2869 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline
))
2877 if assign
!= ':' and assign
!= '::=':
2878 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline
))
2880 grammar
.append((file, dline
, prodname
, syms
))
2884 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline
, ps
.strip()))
2888 # -----------------------------------------------------------------------------
2891 # This class represents information extracted for building a parser including
2892 # start symbol, error function, tokens, precedence list, action functions,
2894 # -----------------------------------------------------------------------------
2895 class ParserReflect(object):
2896 def __init__(self
, pdict
, log
=None):
2899 self
.error_func
= None
2901 self
.modules
= set()
2906 self
.log
= PlyLogger(sys
.stderr
)
2910 # Get all of the basic information
2913 self
.get_error_func()
2915 self
.get_precedence()
2916 self
.get_pfunctions()
2918 # Validate all of the information
2919 def validate_all(self
):
2920 self
.validate_start()
2921 self
.validate_error_func()
2922 self
.validate_tokens()
2923 self
.validate_precedence()
2924 self
.validate_pfunctions()
2925 self
.validate_modules()
2928 # Compute a signature over the grammar
2929 def signature(self
):
2933 parts
.append(self
.start
)
2935 parts
.append(''.join([''.join(p
) for p
in self
.prec
]))
2937 parts
.append(' '.join(self
.tokens
))
2938 for f
in self
.pfuncs
:
2941 except (TypeError, ValueError):
2943 return ''.join(parts
)
2945 # -----------------------------------------------------------------------------
2946 # validate_modules()
2948 # This method checks to see if there are duplicated p_rulename() functions
2949 # in the parser module file. Without this function, it is really easy for
2950 # users to make mistakes by cutting and pasting code fragments (and it's a real
2951 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2952 # we just do a little regular expression pattern matching of def statements
2953 # to try and detect duplicates.
2954 # -----------------------------------------------------------------------------
2956 def validate_modules(self
):
2957 # Match def p_funcname(
2958 fre
= re
.compile(r
'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2960 for module
in self
.modules
:
2962 lines
, linen
= inspect
.getsourcelines(module
)
2967 for linen
, line
in enumerate(lines
):
2972 prev
= counthash
.get(name
)
2974 counthash
[name
] = linen
2976 filename
= inspect
.getsourcefile(module
)
2977 self
.log
.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2978 filename
, linen
, name
, prev
)
2980 # Get the start symbol
2981 def get_start(self
):
2982 self
.start
= self
.pdict
.get('start')
2984 # Validate the start symbol
2985 def validate_start(self
):
2986 if self
.start
is not None:
2987 if not isinstance(self
.start
, string_types
):
2988 self
.log
.error("'start' must be a string")
2990 # Look for error handler
2991 def get_error_func(self
):
2992 self
.error_func
= self
.pdict
.get('p_error')
2994 # Validate the error function
2995 def validate_error_func(self
):
2997 if isinstance(self
.error_func
, types
.FunctionType
):
2999 elif isinstance(self
.error_func
, types
.MethodType
):
3002 self
.log
.error("'p_error' defined, but is not a function or method")
3006 eline
= self
.error_func
.__code
__.co_firstlineno
3007 efile
= self
.error_func
.__code
__.co_filename
3008 module
= inspect
.getmodule(self
.error_func
)
3009 self
.modules
.add(module
)
3011 argcount
= self
.error_func
.__code
__.co_argcount
- ismethod
3013 self
.log
.error('%s:%d: p_error() requires 1 argument', efile
, eline
)
3016 # Get the tokens map
3017 def get_tokens(self
):
3018 tokens
= self
.pdict
.get('tokens')
3020 self
.log
.error('No token list is defined')
3024 if not isinstance(tokens
, (list, tuple)):
3025 self
.log
.error('tokens must be a list or tuple')
3030 self
.log
.error('tokens is empty')
3034 self
.tokens
= sorted(tokens
)
3036 # Validate the tokens
3037 def validate_tokens(self
):
3038 # Validate the tokens.
3039 if 'error' in self
.tokens
:
3040 self
.log
.error("Illegal token name 'error'. Is a reserved word")
3045 for n
in self
.tokens
:
3047 self
.log
.warning('Token %r multiply defined', n
)
3050 # Get the precedence map (if any)
3051 def get_precedence(self
):
3052 self
.prec
= self
.pdict
.get('precedence')
3054 # Validate and parse the precedence map
3055 def validate_precedence(self
):
3058 if not isinstance(self
.prec
, (list, tuple)):
3059 self
.log
.error('precedence must be a list or tuple')
3062 for level
, p
in enumerate(self
.prec
):
3063 if not isinstance(p
, (list, tuple)):
3064 self
.log
.error('Bad precedence table')
3069 self
.log
.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p
)
3073 if not isinstance(assoc
, string_types
):
3074 self
.log
.error('precedence associativity must be a string')
3078 if not isinstance(term
, string_types
):
3079 self
.log
.error('precedence items must be strings')
3082 preclist
.append((term
, assoc
, level
+1))
3083 self
.preclist
= preclist
3085 # Get all p_functions from the grammar
3086 def get_pfunctions(self
):
3088 for name
, item
in self
.pdict
.items():
3089 if not name
.startswith('p_') or name
== 'p_error':
3091 if isinstance(item
, (types
.FunctionType
, types
.MethodType
)):
3092 line
= getattr(item
, 'co_firstlineno', item
.__code
__.co_firstlineno
)
3093 module
= inspect
.getmodule(item
)
3094 p_functions
.append((line
, module
, name
, item
.__doc
__))
3096 # Sort all of the actions by line number; make sure to stringify
3097 # modules to make them sortable, since `line` may not uniquely sort all
3099 p_functions
.sort(key
=lambda p_function
: (
3104 self
.pfuncs
= p_functions
3106 # Validate all of the p_functions
3107 def validate_pfunctions(self
):
3109 # Check for non-empty symbols
3110 if len(self
.pfuncs
) == 0:
3111 self
.log
.error('no rules of the form p_rulename are defined')
3115 for line
, module
, name
, doc
in self
.pfuncs
:
3116 file = inspect
.getsourcefile(module
)
3117 func
= self
.pdict
[name
]
3118 if isinstance(func
, types
.MethodType
):
3122 if func
.__code
__.co_argcount
> reqargs
:
3123 self
.log
.error('%s:%d: Rule %r has too many arguments', file, line
, func
.__name
__)
3125 elif func
.__code
__.co_argcount
< reqargs
:
3126 self
.log
.error('%s:%d: Rule %r requires an argument', file, line
, func
.__name
__)
3128 elif not func
.__doc
__:
3129 self
.log
.warning('%s:%d: No documentation string specified in function %r (ignored)',
3130 file, line
, func
.__name
__)
3133 parsed_g
= parse_grammar(doc
, file, line
)
3135 grammar
.append((name
, g
))
3136 except SyntaxError as e
:
3137 self
.log
.error(str(e
))
3140 # Looks like a valid grammar rule
3141 # Mark the file in which defined.
3142 self
.modules
.add(module
)
3144 # Secondary validation step that looks for p_ definitions that are not functions
3145 # or functions that look like they might be grammar rules.
3147 for n
, v
in self
.pdict
.items():
3148 if n
.startswith('p_') and isinstance(v
, (types
.FunctionType
, types
.MethodType
)):
3150 if n
.startswith('t_'):
3152 if n
.startswith('p_') and n
!= 'p_error':
3153 self
.log
.warning('%r not defined as a function', n
)
3154 if ((isinstance(v
, types
.FunctionType
) and v
.__code
__.co_argcount
== 1) or
3155 (isinstance(v
, types
.MethodType
) and v
.__func
__.__code
__.co_argcount
== 2)):
3158 doc
= v
.__doc
__.split(' ')
3160 self
.log
.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3161 v
.__code
__.co_filename
, v
.__code
__.co_firstlineno
, n
)
3165 self
.grammar
= grammar
3167 # -----------------------------------------------------------------------------
3171 # -----------------------------------------------------------------------------
3173 def yacc(method
='LALR', debug
=yaccdebug
, module
=None, tabmodule
=tab_module
, start
=None,
3174 check_recursion
=True, optimize
=False, write_tables
=True, debugfile
=debug_file
,
3175 outputdir
=None, debuglog
=None, errorlog
=None, picklefile
=None):
3177 if tabmodule
is None:
3178 tabmodule
= tab_module
3180 # Reference to the parsing method of the last built parser
3183 # If pickling is enabled, table files are not created
3187 if errorlog
is None:
3188 errorlog
= PlyLogger(sys
.stderr
)
3190 # Get the module dictionary used for the parser
3192 _items
= [(k
, getattr(module
, k
)) for k
in dir(module
)]
3193 pdict
= dict(_items
)
3194 # If no __file__ or __package__ attributes are available, try to obtain them
3195 # from the __module__ instead
3196 if '__file__' not in pdict
:
3197 pdict
['__file__'] = sys
.modules
[pdict
['__module__']].__file
__
3198 if '__package__' not in pdict
and '__module__' in pdict
:
3199 if hasattr(sys
.modules
[pdict
['__module__']], '__package__'):
3200 pdict
['__package__'] = sys
.modules
[pdict
['__module__']].__package
__
3202 pdict
= get_caller_module_dict(2)
3204 if outputdir
is None:
3205 # If no output directory is set, the location of the output files
3206 # is determined according to the following rules:
3207 # - If tabmodule specifies a package, files go into that package directory
3208 # - Otherwise, files go in the same directory as the specifying module
3209 if isinstance(tabmodule
, types
.ModuleType
):
3210 srcfile
= tabmodule
.__file
__
3212 if '.' not in tabmodule
:
3213 srcfile
= pdict
['__file__']
3215 parts
= tabmodule
.split('.')
3216 pkgname
= '.'.join(parts
[:-1])
3217 exec('import %s' % pkgname
)
3218 srcfile
= getattr(sys
.modules
[pkgname
], '__file__', '')
3219 outputdir
= os
.path
.dirname(srcfile
)
3221 # Determine if the module is package of a package or not.
3222 # If so, fix the tabmodule setting so that tables load correctly
3223 pkg
= pdict
.get('__package__')
3224 if pkg
and isinstance(tabmodule
, str):
3225 if '.' not in tabmodule
:
3226 tabmodule
= pkg
+ '.' + tabmodule
3230 # Set start symbol if it's specified directly using an argument
3231 if start
is not None:
3232 pdict
['start'] = start
3234 # Collect parser information from the dictionary
3235 pinfo
= ParserReflect(pdict
, log
=errorlog
)
3239 raise YaccError('Unable to build parser')
3241 # Check signature against table files (if any)
3242 signature
= pinfo
.signature()
3248 read_signature
= lr
.read_pickle(picklefile
)
3250 read_signature
= lr
.read_table(tabmodule
)
3251 if optimize
or (read_signature
== signature
):
3253 lr
.bind_callables(pinfo
.pdict
)
3254 parser
= LRParser(lr
, pinfo
.error_func
)
3255 parse
= parser
.parse
3257 except Exception as e
:
3258 errorlog
.warning('There was a problem loading the table file: %r', e
)
3259 except VersionError
as e
:
3260 errorlog
.warning(str(e
))
3264 if debuglog
is None:
3267 debuglog
= PlyLogger(open(os
.path
.join(outputdir
, debugfile
), 'w'))
3268 except IOError as e
:
3269 errorlog
.warning("Couldn't open %r. %s" % (debugfile
, e
))
3270 debuglog
= NullLogger()
3272 debuglog
= NullLogger()
3274 debuglog
.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__
)
3278 # Validate the parser information
3279 if pinfo
.validate_all():
3280 raise YaccError('Unable to build parser')
3282 if not pinfo
.error_func
:
3283 errorlog
.warning('no p_error() function is defined')
3285 # Create a grammar object
3286 grammar
= Grammar(pinfo
.tokens
)
3288 # Set precedence level for terminals
3289 for term
, assoc
, level
in pinfo
.preclist
:
3291 grammar
.set_precedence(term
, assoc
, level
)
3292 except GrammarError
as e
:
3293 errorlog
.warning('%s', e
)
3295 # Add productions to the grammar
3296 for funcname
, gram
in pinfo
.grammar
:
3297 file, line
, prodname
, syms
= gram
3299 grammar
.add_production(prodname
, syms
, funcname
, file, line
)
3300 except GrammarError
as e
:
3301 errorlog
.error('%s', e
)
3304 # Set the grammar start symbols
3307 grammar
.set_start(pinfo
.start
)
3309 grammar
.set_start(start
)
3310 except GrammarError
as e
:
3311 errorlog
.error(str(e
))
3315 raise YaccError('Unable to build parser')
3317 # Verify the grammar structure
3318 undefined_symbols
= grammar
.undefined_symbols()
3319 for sym
, prod
in undefined_symbols
:
3320 errorlog
.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod
.file, prod
.line
, sym
)
3323 unused_terminals
= grammar
.unused_terminals()
3324 if unused_terminals
:
3326 debuglog
.info('Unused terminals:')
3328 for term
in unused_terminals
:
3329 errorlog
.warning('Token %r defined, but not used', term
)
3330 debuglog
.info(' %s', term
)
3332 # Print out all productions to the debug log
3335 debuglog
.info('Grammar')
3337 for n
, p
in enumerate(grammar
.Productions
):
3338 debuglog
.info('Rule %-5d %s', n
, p
)
3340 # Find unused non-terminals
3341 unused_rules
= grammar
.unused_rules()
3342 for prod
in unused_rules
:
3343 errorlog
.warning('%s:%d: Rule %r defined, but not used', prod
.file, prod
.line
, prod
.name
)
3345 if len(unused_terminals
) == 1:
3346 errorlog
.warning('There is 1 unused token')
3347 if len(unused_terminals
) > 1:
3348 errorlog
.warning('There are %d unused tokens', len(unused_terminals
))
3350 if len(unused_rules
) == 1:
3351 errorlog
.warning('There is 1 unused rule')
3352 if len(unused_rules
) > 1:
3353 errorlog
.warning('There are %d unused rules', len(unused_rules
))
3357 debuglog
.info('Terminals, with rules where they appear')
3359 terms
= list(grammar
.Terminals
)
3362 debuglog
.info('%-20s : %s', term
, ' '.join([str(s
) for s
in grammar
.Terminals
[term
]]))
3365 debuglog
.info('Nonterminals, with rules where they appear')
3367 nonterms
= list(grammar
.Nonterminals
)
3369 for nonterm
in nonterms
:
3370 debuglog
.info('%-20s : %s', nonterm
, ' '.join([str(s
) for s
in grammar
.Nonterminals
[nonterm
]]))
3374 unreachable
= grammar
.find_unreachable()
3375 for u
in unreachable
:
3376 errorlog
.warning('Symbol %r is unreachable', u
)
3378 infinite
= grammar
.infinite_cycles()
3379 for inf
in infinite
:
3380 errorlog
.error('Infinite recursion detected for symbol %r', inf
)
3383 unused_prec
= grammar
.unused_precedence()
3384 for term
, assoc
in unused_prec
:
3385 errorlog
.error('Precedence rule %r defined for unknown symbol %r', assoc
, term
)
3389 raise YaccError('Unable to build parser')
3391 # Run the LRGeneratedTable on the grammar
3393 errorlog
.debug('Generating %s tables', method
)
3395 lr
= LRGeneratedTable(grammar
, method
, debuglog
)
3398 num_sr
= len(lr
.sr_conflicts
)
3400 # Report shift/reduce and reduce/reduce conflicts
3402 errorlog
.warning('1 shift/reduce conflict')
3404 errorlog
.warning('%d shift/reduce conflicts', num_sr
)
3406 num_rr
= len(lr
.rr_conflicts
)
3408 errorlog
.warning('1 reduce/reduce conflict')
3410 errorlog
.warning('%d reduce/reduce conflicts', num_rr
)
3412 # Write out conflicts to the output file
3413 if debug
and (lr
.sr_conflicts
or lr
.rr_conflicts
):
3414 debuglog
.warning('')
3415 debuglog
.warning('Conflicts:')
3416 debuglog
.warning('')
3418 for state
, tok
, resolution
in lr
.sr_conflicts
:
3419 debuglog
.warning('shift/reduce conflict for %s in state %d resolved as %s', tok
, state
, resolution
)
3421 already_reported
= set()
3422 for state
, rule
, rejected
in lr
.rr_conflicts
:
3423 if (state
, id(rule
), id(rejected
)) in already_reported
:
3425 debuglog
.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state
, rule
)
3426 debuglog
.warning('rejected rule (%s) in state %d', rejected
, state
)
3427 errorlog
.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state
, rule
)
3428 errorlog
.warning('rejected rule (%s) in state %d', rejected
, state
)
3429 already_reported
.add((state
, id(rule
), id(rejected
)))
3432 for state
, rule
, rejected
in lr
.rr_conflicts
:
3433 if not rejected
.reduced
and (rejected
not in warned_never
):
3434 debuglog
.warning('Rule (%s) is never reduced', rejected
)
3435 errorlog
.warning('Rule (%s) is never reduced', rejected
)
3436 warned_never
.append(rejected
)
3438 # Write the table file if requested
3441 lr
.write_table(tabmodule
, outputdir
, signature
)
3442 if tabmodule
in sys
.modules
:
3443 del sys
.modules
[tabmodule
]
3444 except IOError as e
:
3445 errorlog
.warning("Couldn't create %r. %s" % (tabmodule
, e
))
3447 # Write a pickled version of the tables
3450 lr
.pickle_table(picklefile
, signature
)
3451 except IOError as e
:
3452 errorlog
.warning("Couldn't create %r. %s" % (picklefile
, e
))
3455 lr
.bind_callables(pinfo
.pdict
)
3456 parser
= LRParser(lr
, pinfo
.error_func
)
3458 parse
= parser
.parse