Pin Chrome's shortcut to the Win10 Start menu on install and OS upgrade.
[chromium-blink-merge.git] / third_party / cython / src / Cython / Plex / Scanners.py
blob315742f309de1ad94f6cebbf6e485cd004d62f55
1 #=======================================================================
3 # Python Lexical Analyser
6 # Scanning an input stream
8 #=======================================================================
10 import cython
11 cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
13 import Errors
14 from Regexps import BOL, EOL, EOF
16 NOT_FOUND = object()
18 class Scanner(object):
19 """
20 A Scanner is used to read tokens from a stream of characters
21 using the token set specified by a Plex.Lexicon.
23 Constructor:
25 Scanner(lexicon, stream, name = '')
27 See the docstring of the __init__ method for details.
29 Methods:
31 See the docstrings of the individual methods for more
32 information.
34 read() --> (value, text)
35 Reads the next lexical token from the stream.
37 position() --> (name, line, col)
38 Returns the position of the last token read using the
39 read() method.
41 begin(state_name)
42 Causes scanner to change state.
44 produce(value [, text])
45 Causes return of a token value to the caller of the
46 Scanner.
48 """
50 # lexicon = None # Lexicon
51 # stream = None # file-like object
52 # name = ''
53 # buffer = ''
54 # buf_start_pos = 0 # position in input of start of buffer
55 # next_pos = 0 # position in input of next char to read
56 # cur_pos = 0 # position in input of current char
57 # cur_line = 1 # line number of current char
58 # cur_line_start = 0 # position in input of start of current line
59 # start_pos = 0 # position in input of start of token
60 # start_line = 0 # line number of start of token
61 # start_col = 0 # position in line of start of token
62 # text = None # text of last token read
63 # initial_state = None # Node
64 # state_name = '' # Name of initial state
65 # queue = None # list of tokens to be returned
66 # trace = 0
68 def __init__(self, lexicon, stream, name = '', initial_pos = None):
69 """
70 Scanner(lexicon, stream, name = '')
72 |lexicon| is a Plex.Lexicon instance specifying the lexical tokens
73 to be recognised.
75 |stream| can be a file object or anything which implements a
76 compatible read() method.
78 |name| is optional, and may be the name of the file being
79 scanned or any other identifying string.
80 """
81 self.trace = 0
83 self.buffer = u''
84 self.buf_start_pos = 0
85 self.next_pos = 0
86 self.cur_pos = 0
87 self.cur_line = 1
88 self.start_pos = 0
89 self.start_line = 0
90 self.start_col = 0
91 self.text = None
92 self.state_name = None
94 self.lexicon = lexicon
95 self.stream = stream
96 self.name = name
97 self.queue = []
98 self.initial_state = None
99 self.begin('')
100 self.next_pos = 0
101 self.cur_pos = 0
102 self.cur_line_start = 0
103 self.cur_char = BOL
104 self.input_state = 1
105 if initial_pos is not None:
106 self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
108 def read(self):
110 Read the next lexical token from the stream and return a
111 tuple (value, text), where |value| is the value associated with
112 the token as specified by the Lexicon, and |text| is the actual
113 string read from the stream. Returns (None, '') on end of file.
115 queue = self.queue
116 while not queue:
117 self.text, action = self.scan_a_token()
118 if action is None:
119 self.produce(None)
120 self.eof()
121 else:
122 value = action.perform(self, self.text)
123 if value is not None:
124 self.produce(value)
125 result = queue[0]
126 del queue[0]
127 return result
129 def scan_a_token(self):
131 Read the next input sequence recognised by the machine
132 and return (text, action). Returns ('', None) on end of
133 file.
135 self.start_pos = self.cur_pos
136 self.start_line = self.cur_line
137 self.start_col = self.cur_pos - self.cur_line_start
138 action = self.run_machine_inlined()
139 if action is not None:
140 if self.trace:
141 print("Scanner: read: Performing %s %d:%d" % (
142 action, self.start_pos, self.cur_pos))
143 text = self.buffer[self.start_pos - self.buf_start_pos :
144 self.cur_pos - self.buf_start_pos]
145 return (text, action)
146 else:
147 if self.cur_pos == self.start_pos:
148 if self.cur_char is EOL:
149 self.next_char()
150 if self.cur_char is None or self.cur_char is EOF:
151 return (u'', None)
152 raise Errors.UnrecognizedInput(self, self.state_name)
154 def run_machine_inlined(self):
156 Inlined version of run_machine for speed.
158 state = self.initial_state
159 cur_pos = self.cur_pos
160 cur_line = self.cur_line
161 cur_line_start = self.cur_line_start
162 cur_char = self.cur_char
163 input_state = self.input_state
164 next_pos = self.next_pos
165 buffer = self.buffer
166 buf_start_pos = self.buf_start_pos
167 buf_len = len(buffer)
168 b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
169 None, 0, 0, 0, u'', 0, 0
170 trace = self.trace
171 while 1:
172 if trace: #TRACE#
173 print("State %d, %d/%d:%s -->" % ( #TRACE#
174 state['number'], input_state, cur_pos, repr(cur_char))) #TRACE#
175 # Begin inlined self.save_for_backup()
176 #action = state.action #@slow
177 action = state['action'] #@fast
178 if action is not None:
179 b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
180 action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
181 # End inlined self.save_for_backup()
182 c = cur_char
183 #new_state = state.new_state(c) #@slow
184 new_state = state.get(c, NOT_FOUND) #@fast
185 if new_state is NOT_FOUND: #@fast
186 new_state = c and state.get('else') #@fast
187 if new_state:
188 if trace: #TRACE#
189 print("State %d" % new_state['number']) #TRACE#
190 state = new_state
191 # Begin inlined: self.next_char()
192 if input_state == 1:
193 cur_pos = next_pos
194 # Begin inlined: c = self.read_char()
195 buf_index = next_pos - buf_start_pos
196 if buf_index < buf_len:
197 c = buffer[buf_index]
198 next_pos = next_pos + 1
199 else:
200 discard = self.start_pos - buf_start_pos
201 data = self.stream.read(0x1000)
202 buffer = self.buffer[discard:] + data
203 self.buffer = buffer
204 buf_start_pos = buf_start_pos + discard
205 self.buf_start_pos = buf_start_pos
206 buf_len = len(buffer)
207 buf_index = buf_index - discard
208 if data:
209 c = buffer[buf_index]
210 next_pos = next_pos + 1
211 else:
212 c = u''
213 # End inlined: c = self.read_char()
214 if c == u'\n':
215 cur_char = EOL
216 input_state = 2
217 elif not c:
218 cur_char = EOL
219 input_state = 4
220 else:
221 cur_char = c
222 elif input_state == 2:
223 cur_char = u'\n'
224 input_state = 3
225 elif input_state == 3:
226 cur_line = cur_line + 1
227 cur_line_start = cur_pos = next_pos
228 cur_char = BOL
229 input_state = 1
230 elif input_state == 4:
231 cur_char = EOF
232 input_state = 5
233 else: # input_state = 5
234 cur_char = u''
235 # End inlined self.next_char()
236 else: # not new_state
237 if trace: #TRACE#
238 print("blocked") #TRACE#
239 # Begin inlined: action = self.back_up()
240 if b_action is not None:
241 (action, cur_pos, cur_line, cur_line_start,
242 cur_char, input_state, next_pos) = \
243 (b_action, b_cur_pos, b_cur_line, b_cur_line_start,
244 b_cur_char, b_input_state, b_next_pos)
245 else:
246 action = None
247 break # while 1
248 # End inlined: action = self.back_up()
249 self.cur_pos = cur_pos
250 self.cur_line = cur_line
251 self.cur_line_start = cur_line_start
252 self.cur_char = cur_char
253 self.input_state = input_state
254 self.next_pos = next_pos
255 if trace: #TRACE#
256 if action is not None: #TRACE#
257 print("Doing %s" % action) #TRACE#
258 return action
260 def next_char(self):
261 input_state = self.input_state
262 if self.trace:
263 print("Scanner: next: %s [%d] %d" % (" "*20, input_state, self.cur_pos))
264 if input_state == 1:
265 self.cur_pos = self.next_pos
266 c = self.read_char()
267 if c == u'\n':
268 self.cur_char = EOL
269 self.input_state = 2
270 elif not c:
271 self.cur_char = EOL
272 self.input_state = 4
273 else:
274 self.cur_char = c
275 elif input_state == 2:
276 self.cur_char = u'\n'
277 self.input_state = 3
278 elif input_state == 3:
279 self.cur_line = self.cur_line + 1
280 self.cur_line_start = self.cur_pos = self.next_pos
281 self.cur_char = BOL
282 self.input_state = 1
283 elif input_state == 4:
284 self.cur_char = EOF
285 self.input_state = 5
286 else: # input_state = 5
287 self.cur_char = u''
288 if self.trace:
289 print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
291 def position(self):
293 Return a tuple (name, line, col) representing the location of
294 the last token read using the read() method. |name| is the
295 name that was provided to the Scanner constructor; |line|
296 is the line number in the stream (1-based); |col| is the
297 position within the line of the first character of the token
298 (0-based).
300 return (self.name, self.start_line, self.start_col)
302 def get_position(self):
303 """Python accessible wrapper around position(), only for error reporting.
305 return self.position()
307 def begin(self, state_name):
308 """Set the current state of the scanner to the named state."""
309 self.initial_state = (
310 self.lexicon.get_initial_state(state_name))
311 self.state_name = state_name
313 def produce(self, value, text = None):
315 Called from an action procedure, causes |value| to be returned
316 as the token value from read(). If |text| is supplied, it is
317 returned in place of the scanned text.
319 produce() can be called more than once during a single call to an action
320 procedure, in which case the tokens are queued up and returned one
321 at a time by subsequent calls to read(), until the queue is empty,
322 whereupon scanning resumes.
324 if text is None:
325 text = self.text
326 self.queue.append((value, text))
328 def eof(self):
330 Override this method if you want something to be done at
331 end of file.