1 # cython: infer_types=True, language_level=3, py2_import=True
10 cython
.declare(EncodedString
=object, any_string_prefix
=unicode, IDENT
=unicode,
11 print_function
=object)
13 from Cython
import Utils
14 from Cython
.Plex
.Scanners
import Scanner
15 from Cython
.Plex
.Errors
import UnrecognizedInput
16 from Errors
import error
17 from Lexicon
import any_string_prefix
, make_lexicon
, IDENT
18 from Future
import print_function
20 from StringEncoding
import EncodedString
24 scanner_debug_flags
= 0
25 scanner_dump_file
= None
32 lexicon
= make_lexicon()
35 #------------------------------------------------------------------
38 "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
39 "continue", "return", "raise", "import", "exec", "try",
40 "except", "finally", "while", "if", "elif", "else", "for",
41 "in", "assert", "and", "or", "not", "is", "in", "lambda",
42 "from", "yield", "with", "nonlocal",
45 pyx_reserved_words
= py_reserved_words
+ [
46 "include", "ctypedef", "cdef", "cpdef",
47 "cimport", "DEF", "IF", "ELIF", "ELSE"
52 def __init__(self
, name
):
54 self
.__name
__ = name
# for Plex tracing
56 def __call__(self
, stream
, text
):
57 return getattr(stream
, self
.name
)(text
)
59 #------------------------------------------------------------------
61 class CompileTimeScope(object):
63 def __init__(self
, outer
= None):
67 def declare(self
, name
, value
):
68 self
.entries
[name
] = value
70 def update(self
, other
):
71 self
.entries
.update(other
)
73 def lookup_here(self
, name
):
74 return self
.entries
[name
]
76 def __contains__(self
, name
):
77 return name
in self
.entries
79 def lookup(self
, name
):
81 return self
.lookup_here(name
)
85 return outer
.lookup(name
)
89 def initial_compile_time_env():
90 benv
= CompileTimeScope()
91 names
= ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
92 'UNAME_VERSION', 'UNAME_MACHINE')
93 for name
, value
in zip(names
, platform
.uname()):
94 benv
.declare(name
, value
)
96 import __builtin__
as builtins
100 names
= ('False', 'True',
101 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
102 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
103 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
104 'list', 'long', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
105 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
106 'sum', 'tuple', 'xrange', 'zip')
110 benv
.declare(name
, getattr(builtins
, name
))
111 except AttributeError:
114 denv
= CompileTimeScope(benv
)
117 #------------------------------------------------------------------
119 class SourceDescriptor(object):
121 A SourceDescriptor should be considered immutable.
125 _escaped_description
= None
128 assert False # To catch all places where a descriptor is used directly as a filename
130 def set_file_type_from_name(self
, filename
):
131 name
, ext
= os
.path
.splitext(filename
)
132 self
._file
_type
= ext
in ('.pyx', '.pxd', '.py') and ext
[1:] or 'pyx'
134 def is_cython_file(self
):
135 return self
._file
_type
in ('pyx', 'pxd')
137 def is_python_file(self
):
138 return self
._file
_type
== 'py'
140 def get_escaped_description(self
):
141 if self
._escaped
_description
is None:
142 self
._escaped
_description
= \
143 self
.get_description().encode('ASCII', 'replace').decode("ASCII")
144 return self
._escaped
_description
146 def __gt__(self
, other
):
147 # this is only used to provide some sort of order
149 return self
._cmp
_name
> other
._cmp
_name
150 except AttributeError:
153 def __lt__(self
, other
):
154 # this is only used to provide some sort of order
156 return self
._cmp
_name
< other
._cmp
_name
157 except AttributeError:
160 def __le__(self
, other
):
161 # this is only used to provide some sort of order
163 return self
._cmp
_name
<= other
._cmp
_name
164 except AttributeError:
167 class FileSourceDescriptor(SourceDescriptor
):
169 Represents a code source. A code source is a more generic abstraction
170 for a "filename" (as sometimes the code doesn't come from a file).
171 Instances of code sources are passed to Scanner.__init__ as the
172 optional name argument and will be passed back when asking for
173 the position()-tuple.
175 def __init__(self
, filename
, path_description
=None):
176 filename
= Utils
.decode_filename(filename
)
177 self
.path_description
= path_description
or filename
178 self
.filename
= filename
179 self
.set_file_type_from_name(filename
)
180 self
._cmp
_name
= filename
183 def get_lines(self
, encoding
=None, error_handling
=None):
184 # we cache the lines only the second time this is called, in
185 # order to save memory when they are only used once
186 key
= (encoding
, error_handling
)
188 lines
= self
._lines
[key
]
189 if lines
is not None:
193 f
= Utils
.open_source_file(
194 self
.filename
, encoding
=encoding
,
195 error_handling
=error_handling
,
196 # newline normalisation is costly before Py2.6
197 require_normalised_newlines
=False)
202 if key
in self
._lines
:
203 self
._lines
[key
] = lines
205 # do not cache the first access, but remember that we
206 # already read it once
207 self
._lines
[key
] = None
210 def get_description(self
):
211 return self
.path_description
213 def get_error_description(self
):
215 cwd
= Utils
.decode_filename(os
.getcwd() + os
.path
.sep
)
216 if path
.startswith(cwd
):
217 return path
[len(cwd
):]
220 def get_filenametable_entry(self
):
223 def __eq__(self
, other
):
224 return isinstance(other
, FileSourceDescriptor
) and self
.filename
== other
.filename
227 return hash(self
.filename
)
230 return "<FileSourceDescriptor:%s>" % self
.filename
232 class StringSourceDescriptor(SourceDescriptor
):
234 Instances of this class can be used instead of a filenames if the
235 code originates from a string object.
239 def __init__(self
, name
, code
):
241 #self.set_file_type_from_name(name)
242 self
.codelines
= [x
+ "\n" for x
in code
.split("\n")]
243 self
._cmp
_name
= name
245 def get_lines(self
, encoding
=None, error_handling
=None):
247 return self
.codelines
249 return [ line
.encode(encoding
, error_handling
).decode(encoding
)
250 for line
in self
.codelines
]
252 def get_description(self
):
255 get_error_description
= get_description
257 def get_filenametable_entry(self
):
258 return "stringsource"
262 # Do not hash on the name, an identical string source should be the
263 # same object (name is often defaulted in other places)
264 # return hash(self.name)
266 def __eq__(self
, other
):
267 return isinstance(other
, StringSourceDescriptor
) and self
.name
== other
.name
270 return "<StringSourceDescriptor:%s>" % self
.name
272 #------------------------------------------------------------------
274 class PyrexScanner(Scanner
):
275 # context Context Compilation context
276 # included_files [string] Files included with 'include' statement
277 # compile_time_env dict Environment for conditional compilation
278 # compile_time_eval boolean In a true conditional compilation context
279 # compile_time_expr boolean In a compile-time expression context
281 def __init__(self
, file, filename
, parent_scanner
= None,
282 scope
= None, context
= None, source_encoding
=None, parse_comments
=True, initial_pos
=None):
283 Scanner
.__init
__(self
, get_lexicon(), file, filename
, initial_pos
)
285 self
.context
= parent_scanner
.context
286 self
.included_files
= parent_scanner
.included_files
287 self
.compile_time_env
= parent_scanner
.compile_time_env
288 self
.compile_time_eval
= parent_scanner
.compile_time_eval
289 self
.compile_time_expr
= parent_scanner
.compile_time_expr
291 self
.context
= context
292 self
.included_files
= scope
.included_files
293 self
.compile_time_env
= initial_compile_time_env()
294 self
.compile_time_eval
= 1
295 self
.compile_time_expr
= 0
296 if hasattr(context
.options
, 'compile_time_env') and \
297 context
.options
.compile_time_env
is not None:
298 self
.compile_time_env
.update(context
.options
.compile_time_env
)
299 self
.parse_comments
= parse_comments
300 self
.source_encoding
= source_encoding
301 if filename
.is_python_file():
302 self
.in_python_file
= True
303 self
.keywords
= set(py_reserved_words
)
305 self
.in_python_file
= False
306 self
.keywords
= set(pyx_reserved_words
)
307 self
.trace
= trace_scanner
308 self
.indentation_stack
= [0]
309 self
.indentation_char
= None
310 self
.bracket_nesting_level
= 0
315 def commentline(self
, text
):
316 if self
.parse_comments
:
317 self
.produce('commentline', text
)
319 def current_level(self
):
320 return self
.indentation_stack
[-1]
322 def open_bracket_action(self
, text
):
323 self
.bracket_nesting_level
= self
.bracket_nesting_level
+ 1
326 def close_bracket_action(self
, text
):
327 self
.bracket_nesting_level
= self
.bracket_nesting_level
- 1
330 def newline_action(self
, text
):
331 if self
.bracket_nesting_level
== 0:
333 self
.produce('NEWLINE', '')
342 def begin_string_action(self
, text
):
343 while text
[:1] in any_string_prefix
:
345 self
.begin(self
.string_states
[text
])
346 self
.produce('BEGIN_STRING')
348 def end_string_action(self
, text
):
350 self
.produce('END_STRING')
352 def unclosed_string_action(self
, text
):
353 self
.end_string_action(text
)
354 self
.error("Unclosed string literal")
356 def indentation_action(self
, text
):
358 # Indentation within brackets should be ignored.
359 #if self.bracket_nesting_level > 0:
361 # Check that tabs and spaces are being used consistently.
364 #print "Scanner.indentation_action: indent with", repr(c) ###
365 if self
.indentation_char
is None:
366 self
.indentation_char
= c
367 #print "Scanner.indentation_action: setting indent_char to", repr(c)
369 if self
.indentation_char
!= c
:
370 self
.error("Mixed use of tabs and spaces")
371 if text
.replace(c
, "") != "":
372 self
.error("Mixed use of tabs and spaces")
373 # Figure out how many indents/dedents to do
374 current_level
= self
.current_level()
375 new_level
= len(text
)
376 #print "Changing indent level from", current_level, "to", new_level ###
377 if new_level
== current_level
:
379 elif new_level
> current_level
:
380 #print "...pushing level", new_level ###
381 self
.indentation_stack
.append(new_level
)
382 self
.produce('INDENT', '')
384 while new_level
< self
.current_level():
385 #print "...popping level", self.indentation_stack[-1] ###
386 self
.indentation_stack
.pop()
387 self
.produce('DEDENT', '')
388 #print "...current level now", self.current_level() ###
389 if new_level
!= self
.current_level():
390 self
.error("Inconsistent indentation")
392 def eof_action(self
, text
):
393 while len(self
.indentation_stack
) > 1:
394 self
.produce('DEDENT', '')
395 self
.indentation_stack
.pop()
396 self
.produce('EOF', '')
400 sy
, systring
= self
.read()
401 except UnrecognizedInput
:
402 self
.error("Unrecognized character")
404 if systring
in self
.keywords
:
405 if systring
== u
'print' and print_function
in self
.context
.future_directives
:
406 self
.keywords
.discard('print')
407 systring
= EncodedString(systring
)
408 elif systring
== u
'exec' and self
.context
.language_level
>= 3:
409 self
.keywords
.discard('exec')
410 systring
= EncodedString(systring
)
414 systring
= EncodedString(systring
)
416 self
.systring
= systring
417 if False: # debug_scanner:
418 _
, line
, col
= self
.position()
419 if not self
.systring
or self
.sy
== self
.systring
:
422 t
= "%s %s" % (self
.sy
, self
.systring
)
423 print("--- %3d %2d %s" % (line
, col
, t
))
426 saved
= self
.sy
, self
.systring
428 next
= self
.sy
, self
.systring
430 self
.sy
, self
.systring
= saved
433 def put_back(self
, sy
, systring
):
434 self
.unread(self
.sy
, self
.systring
)
436 self
.systring
= systring
438 def unread(self
, token
, value
):
439 # This method should be added to Plex
440 self
.queue
.insert(0, (token
, value
))
442 def error(self
, message
, pos
= None, fatal
= True):
444 pos
= self
.position()
445 if self
.sy
== 'INDENT':
446 err
= error(pos
, "Possible inconsistent indentation")
447 err
= error(pos
, message
)
450 def expect(self
, what
, message
= None):
454 self
.expected(what
, message
)
456 def expect_keyword(self
, what
, message
= None):
457 if self
.sy
== IDENT
and self
.systring
== what
:
460 self
.expected(what
, message
)
462 def expected(self
, what
, message
= None):
467 found
= self
.systring
470 self
.error("Expected '%s', found '%s'" % (what
, found
))
472 def expect_indent(self
):
473 self
.expect('INDENT',
474 "Expected an increase in indentation level")
476 def expect_dedent(self
):
477 self
.expect('DEDENT',
478 "Expected a decrease in indentation level")
480 def expect_newline(self
, message
= "Expected a newline"):
481 # Expect either a newline or end of file
483 self
.expect('NEWLINE', message
)