Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / cython / src / Cython / Compiler / Scanning.py
blob93905e09792bdec62616bbf1f0252e524fd9a4ff
1 # cython: infer_types=True, language_level=3, py2_import=True
3 # Cython Scanner
6 import os
7 import platform
9 import cython
10 cython.declare(EncodedString=object, any_string_prefix=unicode, IDENT=unicode,
11 print_function=object)
13 from Cython import Utils
14 from Cython.Plex.Scanners import Scanner
15 from Cython.Plex.Errors import UnrecognizedInput
16 from Errors import error
17 from Lexicon import any_string_prefix, make_lexicon, IDENT
18 from Future import print_function
20 from StringEncoding import EncodedString
22 debug_scanner = 0
23 trace_scanner = 0
24 scanner_debug_flags = 0
25 scanner_dump_file = None
27 lexicon = None
29 def get_lexicon():
30 global lexicon
31 if not lexicon:
32 lexicon = make_lexicon()
33 return lexicon
35 #------------------------------------------------------------------
37 py_reserved_words = [
38 "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
39 "continue", "return", "raise", "import", "exec", "try",
40 "except", "finally", "while", "if", "elif", "else", "for",
41 "in", "assert", "and", "or", "not", "is", "in", "lambda",
42 "from", "yield", "with", "nonlocal",
45 pyx_reserved_words = py_reserved_words + [
46 "include", "ctypedef", "cdef", "cpdef",
47 "cimport", "DEF", "IF", "ELIF", "ELSE"
50 class Method(object):
52 def __init__(self, name):
53 self.name = name
54 self.__name__ = name # for Plex tracing
56 def __call__(self, stream, text):
57 return getattr(stream, self.name)(text)
59 #------------------------------------------------------------------
61 class CompileTimeScope(object):
63 def __init__(self, outer = None):
64 self.entries = {}
65 self.outer = outer
67 def declare(self, name, value):
68 self.entries[name] = value
70 def update(self, other):
71 self.entries.update(other)
73 def lookup_here(self, name):
74 return self.entries[name]
76 def __contains__(self, name):
77 return name in self.entries
79 def lookup(self, name):
80 try:
81 return self.lookup_here(name)
82 except KeyError:
83 outer = self.outer
84 if outer:
85 return outer.lookup(name)
86 else:
87 raise
89 def initial_compile_time_env():
90 benv = CompileTimeScope()
91 names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
92 'UNAME_VERSION', 'UNAME_MACHINE')
93 for name, value in zip(names, platform.uname()):
94 benv.declare(name, value)
95 try:
96 import __builtin__ as builtins
97 except ImportError:
98 import builtins
100 names = ('False', 'True',
101 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
102 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
103 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
104 'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
105 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
106 'sum', 'tuple', 'xrange', 'zip')
108 for name in names:
109 try:
110 benv.declare(name, getattr(builtins, name))
111 except AttributeError:
112 # ignore, likely Py3
113 pass
114 denv = CompileTimeScope(benv)
115 return denv
117 #------------------------------------------------------------------
119 class SourceDescriptor(object):
121 A SourceDescriptor should be considered immutable.
123 _file_type = 'pyx'
125 _escaped_description = None
126 _cmp_name = ''
127 def __str__(self):
128 assert False # To catch all places where a descriptor is used directly as a filename
130 def set_file_type_from_name(self, filename):
131 name, ext = os.path.splitext(filename)
132 self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
134 def is_cython_file(self):
135 return self._file_type in ('pyx', 'pxd')
137 def is_python_file(self):
138 return self._file_type == 'py'
140 def get_escaped_description(self):
141 if self._escaped_description is None:
142 self._escaped_description = \
143 self.get_description().encode('ASCII', 'replace').decode("ASCII")
144 return self._escaped_description
146 def __gt__(self, other):
147 # this is only used to provide some sort of order
148 try:
149 return self._cmp_name > other._cmp_name
150 except AttributeError:
151 return False
153 def __lt__(self, other):
154 # this is only used to provide some sort of order
155 try:
156 return self._cmp_name < other._cmp_name
157 except AttributeError:
158 return False
160 def __le__(self, other):
161 # this is only used to provide some sort of order
162 try:
163 return self._cmp_name <= other._cmp_name
164 except AttributeError:
165 return False
167 class FileSourceDescriptor(SourceDescriptor):
169 Represents a code source. A code source is a more generic abstraction
170 for a "filename" (as sometimes the code doesn't come from a file).
171 Instances of code sources are passed to Scanner.__init__ as the
172 optional name argument and will be passed back when asking for
173 the position()-tuple.
175 def __init__(self, filename, path_description=None):
176 filename = Utils.decode_filename(filename)
177 self.path_description = path_description or filename
178 self.filename = filename
179 self.set_file_type_from_name(filename)
180 self._cmp_name = filename
181 self._lines = {}
183 def get_lines(self, encoding=None, error_handling=None):
184 # we cache the lines only the second time this is called, in
185 # order to save memory when they are only used once
186 key = (encoding, error_handling)
187 try:
188 lines = self._lines[key]
189 if lines is not None:
190 return lines
191 except KeyError:
192 pass
193 f = Utils.open_source_file(
194 self.filename, encoding=encoding,
195 error_handling=error_handling,
196 # newline normalisation is costly before Py2.6
197 require_normalised_newlines=False)
198 try:
199 lines = list(f)
200 finally:
201 f.close()
202 if key in self._lines:
203 self._lines[key] = lines
204 else:
205 # do not cache the first access, but remember that we
206 # already read it once
207 self._lines[key] = None
208 return lines
210 def get_description(self):
211 return self.path_description
213 def get_error_description(self):
214 path = self.filename
215 cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
216 if path.startswith(cwd):
217 return path[len(cwd):]
218 return path
220 def get_filenametable_entry(self):
221 return self.filename
223 def __eq__(self, other):
224 return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
226 def __hash__(self):
227 return hash(self.filename)
229 def __repr__(self):
230 return "<FileSourceDescriptor:%s>" % self.filename
232 class StringSourceDescriptor(SourceDescriptor):
234 Instances of this class can be used instead of a filenames if the
235 code originates from a string object.
237 filename = None
239 def __init__(self, name, code):
240 self.name = name
241 #self.set_file_type_from_name(name)
242 self.codelines = [x + "\n" for x in code.split("\n")]
243 self._cmp_name = name
245 def get_lines(self, encoding=None, error_handling=None):
246 if not encoding:
247 return self.codelines
248 else:
249 return [ line.encode(encoding, error_handling).decode(encoding)
250 for line in self.codelines ]
252 def get_description(self):
253 return self.name
255 get_error_description = get_description
257 def get_filenametable_entry(self):
258 return "stringsource"
260 def __hash__(self):
261 return id(self)
262 # Do not hash on the name, an identical string source should be the
263 # same object (name is often defaulted in other places)
264 # return hash(self.name)
266 def __eq__(self, other):
267 return isinstance(other, StringSourceDescriptor) and self.name == other.name
269 def __repr__(self):
270 return "<StringSourceDescriptor:%s>" % self.name
272 #------------------------------------------------------------------
274 class PyrexScanner(Scanner):
275 # context Context Compilation context
276 # included_files [string] Files included with 'include' statement
277 # compile_time_env dict Environment for conditional compilation
278 # compile_time_eval boolean In a true conditional compilation context
279 # compile_time_expr boolean In a compile-time expression context
281 def __init__(self, file, filename, parent_scanner = None,
282 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
283 Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
284 if parent_scanner:
285 self.context = parent_scanner.context
286 self.included_files = parent_scanner.included_files
287 self.compile_time_env = parent_scanner.compile_time_env
288 self.compile_time_eval = parent_scanner.compile_time_eval
289 self.compile_time_expr = parent_scanner.compile_time_expr
290 else:
291 self.context = context
292 self.included_files = scope.included_files
293 self.compile_time_env = initial_compile_time_env()
294 self.compile_time_eval = 1
295 self.compile_time_expr = 0
296 if hasattr(context.options, 'compile_time_env') and \
297 context.options.compile_time_env is not None:
298 self.compile_time_env.update(context.options.compile_time_env)
299 self.parse_comments = parse_comments
300 self.source_encoding = source_encoding
301 if filename.is_python_file():
302 self.in_python_file = True
303 self.keywords = set(py_reserved_words)
304 else:
305 self.in_python_file = False
306 self.keywords = set(pyx_reserved_words)
307 self.trace = trace_scanner
308 self.indentation_stack = [0]
309 self.indentation_char = None
310 self.bracket_nesting_level = 0
311 self.begin('INDENT')
312 self.sy = ''
313 self.next()
315 def commentline(self, text):
316 if self.parse_comments:
317 self.produce('commentline', text)
319 def current_level(self):
320 return self.indentation_stack[-1]
322 def open_bracket_action(self, text):
323 self.bracket_nesting_level = self.bracket_nesting_level + 1
324 return text
326 def close_bracket_action(self, text):
327 self.bracket_nesting_level = self.bracket_nesting_level - 1
328 return text
330 def newline_action(self, text):
331 if self.bracket_nesting_level == 0:
332 self.begin('INDENT')
333 self.produce('NEWLINE', '')
335 string_states = {
336 "'": 'SQ_STRING',
337 '"': 'DQ_STRING',
338 "'''": 'TSQ_STRING',
339 '"""': 'TDQ_STRING'
342 def begin_string_action(self, text):
343 while text[:1] in any_string_prefix:
344 text = text[1:]
345 self.begin(self.string_states[text])
346 self.produce('BEGIN_STRING')
348 def end_string_action(self, text):
349 self.begin('')
350 self.produce('END_STRING')
352 def unclosed_string_action(self, text):
353 self.end_string_action(text)
354 self.error("Unclosed string literal")
356 def indentation_action(self, text):
357 self.begin('')
358 # Indentation within brackets should be ignored.
359 #if self.bracket_nesting_level > 0:
360 # return
361 # Check that tabs and spaces are being used consistently.
362 if text:
363 c = text[0]
364 #print "Scanner.indentation_action: indent with", repr(c) ###
365 if self.indentation_char is None:
366 self.indentation_char = c
367 #print "Scanner.indentation_action: setting indent_char to", repr(c)
368 else:
369 if self.indentation_char != c:
370 self.error("Mixed use of tabs and spaces")
371 if text.replace(c, "") != "":
372 self.error("Mixed use of tabs and spaces")
373 # Figure out how many indents/dedents to do
374 current_level = self.current_level()
375 new_level = len(text)
376 #print "Changing indent level from", current_level, "to", new_level ###
377 if new_level == current_level:
378 return
379 elif new_level > current_level:
380 #print "...pushing level", new_level ###
381 self.indentation_stack.append(new_level)
382 self.produce('INDENT', '')
383 else:
384 while new_level < self.current_level():
385 #print "...popping level", self.indentation_stack[-1] ###
386 self.indentation_stack.pop()
387 self.produce('DEDENT', '')
388 #print "...current level now", self.current_level() ###
389 if new_level != self.current_level():
390 self.error("Inconsistent indentation")
392 def eof_action(self, text):
393 while len(self.indentation_stack) > 1:
394 self.produce('DEDENT', '')
395 self.indentation_stack.pop()
396 self.produce('EOF', '')
398 def next(self):
399 try:
400 sy, systring = self.read()
401 except UnrecognizedInput:
402 self.error("Unrecognized character")
403 if sy == IDENT:
404 if systring in self.keywords:
405 if systring == u'print' and print_function in self.context.future_directives:
406 self.keywords.discard('print')
407 systring = EncodedString(systring)
408 elif systring == u'exec' and self.context.language_level >= 3:
409 self.keywords.discard('exec')
410 systring = EncodedString(systring)
411 else:
412 sy = systring
413 else:
414 systring = EncodedString(systring)
415 self.sy = sy
416 self.systring = systring
417 if False: # debug_scanner:
418 _, line, col = self.position()
419 if not self.systring or self.sy == self.systring:
420 t = self.sy
421 else:
422 t = "%s %s" % (self.sy, self.systring)
423 print("--- %3d %2d %s" % (line, col, t))
425 def peek(self):
426 saved = self.sy, self.systring
427 self.next()
428 next = self.sy, self.systring
429 self.unread(*next)
430 self.sy, self.systring = saved
431 return next
433 def put_back(self, sy, systring):
434 self.unread(self.sy, self.systring)
435 self.sy = sy
436 self.systring = systring
438 def unread(self, token, value):
439 # This method should be added to Plex
440 self.queue.insert(0, (token, value))
442 def error(self, message, pos = None, fatal = True):
443 if pos is None:
444 pos = self.position()
445 if self.sy == 'INDENT':
446 err = error(pos, "Possible inconsistent indentation")
447 err = error(pos, message)
448 if fatal: raise err
450 def expect(self, what, message = None):
451 if self.sy == what:
452 self.next()
453 else:
454 self.expected(what, message)
456 def expect_keyword(self, what, message = None):
457 if self.sy == IDENT and self.systring == what:
458 self.next()
459 else:
460 self.expected(what, message)
462 def expected(self, what, message = None):
463 if message:
464 self.error(message)
465 else:
466 if self.sy == IDENT:
467 found = self.systring
468 else:
469 found = self.sy
470 self.error("Expected '%s', found '%s'" % (what, found))
472 def expect_indent(self):
473 self.expect('INDENT',
474 "Expected an increase in indentation level")
476 def expect_dedent(self):
477 self.expect('DEDENT',
478 "Expected a decrease in indentation level")
480 def expect_newline(self, message = "Expected a newline"):
481 # Expect either a newline or end of file
482 if self.sy != 'EOF':
483 self.expect('NEWLINE', message)