Fix the no password save issue for ajax login
[chromium-blink-merge.git] / ppapi / generators / idl_lexer.py
blob4120ac7cfdd3aaa7441d07e5ed1e3f6c368e8406
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """ Lexer for PPAPI IDL """
9 # IDL Lexer
11 # The lexer is uses the PLY lex library to build a tokenizer which understands
12 # WebIDL tokens.
14 # WebIDL, and WebIDL regular expressions can be found at:
15 # http://dev.w3.org/2006/webapi/WebIDL/
16 # PLY can be found at:
17 # http://www.dabeaz.com/ply/
19 import os.path
20 import re
21 import sys
24 # Try to load the ply module, if not, then assume it is in the third_party
25 # directory, relative to ppapi
27 try:
28 from ply import lex
29 except:
30 module_path, module_name = os.path.split(__file__)
31 third_party = os.path.join(module_path, '..', '..', 'third_party')
32 sys.path.append(third_party)
33 from ply import lex
35 from idl_option import GetOption, Option, ParseOptions
38 Option('output', 'Generate output.')
41 # IDL Lexer
43 class IDLLexer(object):
44 # 'tokens' is a value required by lex which specifies the complete list
45 # of valid token types.
46 tokens = [
47 # Symbol and keywords types
48 'COMMENT',
49 'DESCRIBE',
50 'ENUM',
51 'LABEL',
52 'SYMBOL',
53 'INLINE',
54 'INTERFACE',
55 'STRUCT',
56 'TYPEDEF',
58 # Extra WebIDL keywords
59 'CALLBACK',
60 'DICTIONARY',
61 'OPTIONAL',
62 'STATIC',
64 # Invented for apps use
65 'NAMESPACE',
67 # Data types
68 'FLOAT',
69 'OCT',
70 'INT',
71 'HEX',
72 'STRING',
74 # Operators
75 'LSHIFT',
76 'RSHIFT'
79 # 'keywords' is a map of string to token type. All SYMBOL tokens are
80 # matched against keywords, to determine if the token is actually a keyword.
81 keywords = {
82 'describe' : 'DESCRIBE',
83 'enum' : 'ENUM',
84 'label' : 'LABEL',
85 'interface' : 'INTERFACE',
86 'readonly' : 'READONLY',
87 'struct' : 'STRUCT',
88 'typedef' : 'TYPEDEF',
90 'callback' : 'CALLBACK',
91 'dictionary' : 'DICTIONARY',
92 'optional' : 'OPTIONAL',
93 'static' : 'STATIC',
94 'namespace' : 'NAMESPACE',
97 # 'literals' is a value expected by lex which specifies a list of valid
98 # literal tokens, meaning the token type and token value are identical.
99 literals = '"*.(){}[],;:=+-/~|&^?'
101 # Token definitions
103 # Lex assumes any value or function in the form of 't_<TYPE>' represents a
104 # regular expression where a match will emit a token of type <TYPE>. In the
105 # case of a function, the function is called when a match is made. These
106 # definitions come from WebIDL.
108 # 't_ignore' is a special match of items to ignore
109 t_ignore = ' \t'
111 # Constant values
112 t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|-?\d+[Ee][+-]?\d+'
113 t_INT = r'-?[0-9]+[uU]?'
114 t_OCT = r'-?0[0-7]+'
115 t_HEX = r'-?0[Xx][0-9A-Fa-f]+'
116 t_LSHIFT = r'<<'
117 t_RSHIFT = r'>>'
119 # A line ending '\n', we use this to increment the line number
120 def t_LINE_END(self, t):
121 r'\n+'
122 self.AddLines(len(t.value))
124 # We do not process escapes in the IDL strings. Strings are exclusively
125 # used for attributes, and not used as typical 'C' constants.
126 def t_STRING(self, t):
127 r'"[^"]*"'
128 t.value = t.value[1:-1]
129 self.AddLines(t.value.count('\n'))
130 return t
132 # A C or C++ style comment: /* xxx */ or //
133 def t_COMMENT(self, t):
134 r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
135 self.AddLines(t.value.count('\n'))
136 return t
138 # Return a "preprocessor" inline block
139 def t_INLINE(self, t):
140 r'\#inline (.|\n)*?\#endinl.*'
141 self.AddLines(t.value.count('\n'))
142 return t
144 # A symbol or keyword.
145 def t_KEYWORD_SYMBOL(self, t):
146 r'_?[A-Za-z][A-Za-z_0-9]*'
148 # All non-keywords are assumed to be symbols
149 t.type = self.keywords.get(t.value, 'SYMBOL')
151 # We strip leading underscores so that you can specify symbols with the same
152 # value as a keywords (E.g. a dictionary named 'interface').
153 if t.value[0] == '_':
154 t.value = t.value[1:]
155 return t
157 def t_ANY_error(self, t):
158 msg = "Unrecognized input"
159 line = self.lexobj.lineno
161 # If that line has not been accounted for, then we must have hit
162 # EoF, so compute the beginning of the line that caused the problem.
163 if line >= len(self.index):
164 # Find the offset in the line of the first word causing the issue
165 word = t.value.split()[0]
166 offs = self.lines[line - 1].find(word)
167 # Add the computed line's starting position
168 self.index.append(self.lexobj.lexpos - offs)
169 msg = "Unexpected EoF reached after"
171 pos = self.lexobj.lexpos - self.index[line]
172 file = self.lexobj.filename
173 out = self.ErrorMessage(file, line, pos, msg)
174 sys.stderr.write(out + '\n')
175 self.lex_errors += 1
178 def AddLines(self, count):
179 # Set the lexer position for the beginning of the next line. In the case
180 # of multiple lines, tokens can not exist on any of the lines except the
181 # last one, so the recorded value for previous lines are unused. We still
182 # fill the array however, to make sure the line count is correct.
183 self.lexobj.lineno += count
184 for i in range(count):
185 self.index.append(self.lexobj.lexpos)
187 def FileLineMsg(self, file, line, msg):
188 if file: return "%s(%d) : %s" % (file, line + 1, msg)
189 return "<BuiltIn> : %s" % msg
191 def SourceLine(self, file, line, pos):
192 caret = '\t^'.expandtabs(pos)
193 # We decrement the line number since the array is 0 based while the
194 # line numbers are 1 based.
195 return "%s\n%s" % (self.lines[line - 1], caret)
197 def ErrorMessage(self, file, line, pos, msg):
198 return "\n%s\n%s" % (
199 self.FileLineMsg(file, line, msg),
200 self.SourceLine(file, line, pos))
202 def SetData(self, filename, data):
203 # Start with line 1, not zero
204 self.lexobj.lineno = 1
205 self.lexobj.filename = filename
206 self.lines = data.split('\n')
207 self.index = [0]
208 self.lexobj.input(data)
209 self.lex_errors = 0
211 def __init__(self):
212 self.lexobj = lex.lex(object=self, lextab=None, optimize=0)
217 # FilesToTokens
219 # From a set of source file names, generate a list of tokens.
221 def FilesToTokens(filenames, verbose=False):
222 lexer = IDLLexer()
223 outlist = []
224 for filename in filenames:
225 data = open(filename).read()
226 lexer.SetData(filename, data)
227 if verbose: sys.stdout.write(' Loaded %s...\n' % filename)
228 while 1:
229 t = lexer.lexobj.token()
230 if t is None: break
231 outlist.append(t)
232 return outlist
235 def TokensFromText(text):
236 lexer = IDLLexer()
237 lexer.SetData('unknown', text)
238 outlist = []
239 while 1:
240 t = lexer.lexobj.token()
241 if t is None: break
242 outlist.append(t.value)
243 return outlist
246 # TextToTokens
248 # From a block of text, generate a list of tokens
250 def TextToTokens(source):
251 lexer = IDLLexer()
252 outlist = []
253 lexer.SetData('AUTO', source)
254 while 1:
255 t = lexer.lexobj.token()
256 if t is None: break
257 outlist.append(t.value)
258 return outlist
262 # TestSame
264 # From a set of token values, generate a new source text by joining with a
265 # single space. The new source is then tokenized and compared against the
266 # old set.
268 def TestSame(values1):
269 # Recreate the source from the tokens. We use newline instead of whitespace
270 # since the '//' and #inline regex are line sensitive.
271 text = '\n'.join(values1)
272 values2 = TextToTokens(text)
274 count1 = len(values1)
275 count2 = len(values2)
276 if count1 != count2:
277 print "Size mismatch original %d vs %d\n" % (count1, count2)
278 if count1 > count2: count1 = count2
280 for i in range(count1):
281 if values1[i] != values2[i]:
282 print "%d >>%s<< >>%s<<" % (i, values1[i], values2[i])
284 if GetOption('output'):
285 sys.stdout.write('Generating original.txt and tokenized.txt\n')
286 open('original.txt', 'w').write(src1)
287 open('tokenized.txt', 'w').write(src2)
289 if values1 == values2:
290 sys.stdout.write('Same: Pass\n')
291 return 0
293 print "****************\n%s\n%s***************\n" % (src1, src2)
294 sys.stdout.write('Same: Failed\n')
295 return -1
299 # TestExpect
301 # From a set of tokens pairs, verify the type field of the second matches
302 # the value of the first, so that:
303 # INT 123 FLOAT 1.1
304 # will generate a passing test, where the first token is the SYMBOL INT,
305 # and the second token is the INT 123, third token is the SYMBOL FLOAT and
306 # the fourth is the FLOAT 1.1, etc...
307 def TestExpect(tokens):
308 count = len(tokens)
309 index = 0
310 errors = 0
311 while index < count:
312 type = tokens[index].value
313 token = tokens[index + 1]
314 index += 2
316 if type != token.type:
317 sys.stderr.write('Mismatch: Expected %s, but got %s = %s.\n' %
318 (type, token.type, token.value))
319 errors += 1
321 if not errors:
322 sys.stdout.write('Expect: Pass\n')
323 return 0
325 sys.stdout.write('Expect: Failed\n')
326 return -1
329 def Main(args):
330 filenames = ParseOptions(args)
332 try:
333 tokens = FilesToTokens(filenames, GetOption('verbose'))
334 values = [tok.value for tok in tokens]
335 if GetOption('output'): sys.stdout.write(' <> '.join(values) + '\n')
336 if GetOption('test'):
337 if TestSame(values):
338 return -1
339 if TestExpect(tokens):
340 return -1
341 return 0
343 except lex.LexError as le:
344 sys.stderr.write('%s\n' % str(le))
345 return -1
348 if __name__ == '__main__':
349 sys.exit(Main(sys.argv[1:]))