sync master with lastest vba changes
[ooovba.git] / toolkit / src2xml / source / srclexer.py
blob5a5a3319b0d1683ddd1437c4586d6ea7deb0acfe
1 import sys, os.path
2 from globals import *
3 import macroparser
5 class EOF(Exception):
6 def __init__ (self):
7 pass
9 def str (self):
10 return "end of file"
12 class BOF(Exception):
13 def __init__ (self):
14 pass
16 def str (self):
17 return "beginning of file"
20 def removeHeaderQuotes (orig):
21 if len(orig) <= 2:
22 return orig
23 elif orig[0] == orig[-1] == '"':
24 return orig[1:-1]
25 elif orig[0] == '<' and orig[-1] == '>':
26 return orig[1:-1]
28 return orig
31 def dumpTokens (tokens, toError=False):
33 scope = 0
34 indent = " "
35 line = ''
36 chars = ''
38 for token in tokens:
39 if token in '{<':
40 if len(line) > 0:
41 chars += indent*scope + line + "\n"
42 line = ''
43 chars += indent*scope + token + "\n"
44 scope += 1
46 elif token in '}>':
47 if len(line) > 0:
48 chars += indent*scope + line + "\n"
49 line = ''
50 scope -= 1
51 chars += indent*scope + token
53 elif token == ';':
54 if len(line) > 0:
55 chars += indent*scope + line + ";\n"
56 line = ''
57 else:
58 chars += ";\n"
59 elif len(token) > 0:
60 line += token + ' '
62 if len(line) > 0:
63 chars += line
64 chars += "\n"
65 if toError:
66 sys.stderr.write(chars)
67 else:
68 sys.stdout.write(chars)
71 class HeaderData(object):
72 def __init__ (self):
73 self.defines = {}
74 self.tokens = []
77 class SrcLexer(object):
78 """Lexicographical analyzer for .src format.
80 The role of a lexer is to parse the source file and break it into
81 appropriate tokens. Such tokens are later passed to a parser to
82 build the syntax tree.
83 """
84 headerCache = {}
86 VISIBLE = 0
87 INVISIBLE_PRE = 1
88 INVISIBLE_POST = 2
90 def __init__ (self, chars, filepath = None):
91 self.filepath = filepath
92 self.parentLexer = None
93 self.chars = chars
94 self.bufsize = len(self.chars)
96 # TODO: use parameters for this
97 # Properties that can be copied.
98 self.headerDict = dict ()
99 self.debug = False
100 self.debugMacro = False
101 self.includeDirs = list ()
102 self.expandHeaders = True
103 self.inMacroDefine = False
104 self.stopOnHeader = False
106 def copyProperties (self, other):
107 """Copy properties from another instance of SrcLexer."""
109 # TODO: use parameters for this
110 self.headerDict = other.headerDict
111 self.debug = other.debug
112 self.debugMacro = other.debugMacro
113 self.includeDirs = other.includeDirs[:]
114 self.expandHeaders = other.expandHeaders
115 self.inMacroDefine = other.inMacroDefine
116 self.stopOnHeader = other.stopOnHeader
118 def init (self):
119 self.firstNonBlank = ''
120 self.token = ''
121 self.tokens = []
122 self.defines = {}
123 self.visibilityStack = []
125 def getTokens (self):
126 return self.tokens
128 def getDefines (self):
129 return self.defines
131 def nextPos (self, i):
132 while True:
133 i += 1
134 try:
135 c = self.chars[i]
136 except IndexError:
137 raise EOF
139 if ord(c) in [0x0D]:
140 continue
141 break
142 return i
144 def prevPos (self, i):
145 while True:
146 i -= 1
147 try:
148 c = self.chars[i]
149 except IndexError:
150 raise BOF
152 if ord(c) in [0x0D]:
153 continue
154 break
155 return i
157 def isCodeVisible (self):
158 if len(self.visibilityStack) == 0:
159 return True
160 for item in self.visibilityStack:
161 if item != SrcLexer.VISIBLE:
162 return False
163 return True
165 def tokenize (self):
166 self.init()
168 i = 0
169 while True:
170 c = self.chars[i]
172 if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]:
173 # Store the first non-blank in a line.
174 self.firstNonBlank = c
175 elif c == "\n":
176 self.firstNonBlank = ''
178 if c == '#':
179 i = self.pound(i)
180 elif c == '/':
181 i = self.slash(i)
182 elif c == "\n":
183 i = self.lineBreak(i)
184 elif c == '"':
185 i = self.doubleQuote(i)
186 elif c in [' ', "\t"]:
187 i = self.blank(i)
188 elif c in ";()[]{}<>,=+-*":
189 # Any outstanding single-character token.
190 i = self.anyToken(i, c)
191 elif self.isCodeVisible():
192 self.token += c
194 try:
195 i = self.nextPos(i)
196 except EOF:
197 break
199 if len(self.token):
200 self.tokens.append(self.token)
202 if not self.parentLexer and self.debug:
203 progress ("-"*68 + "\n")
204 progress ("All defines found in this translation unit:\n")
205 keys = self.defines.keys()
206 keys.sort()
207 for key in keys:
208 progress ("@ %s\n"%key)
210 def dumpTokens (self, toError=False):
211 dumpTokens(self.tokens, toError)
214 def maybeAddToken (self):
215 if len(self.token) > 0:
216 self.tokens.append(self.token)
217 self.token = ''
220 #--------------------------------------------------------------------
221 # character handlers
223 def blank (self, i):
224 if not self.isCodeVisible():
225 return i
227 self.maybeAddToken()
228 return i
231 def pound (self, i):
233 if self.inMacroDefine:
234 return i
236 if not self.firstNonBlank == '#':
237 return i
239 self.maybeAddToken()
240 # We are in preprocessing mode.
242 # Get the macro command name '#<command> .....'
244 command, define, buf = '', '', ''
245 firstNonBlank = False
246 while True:
247 try:
248 i = self.nextPos(i)
249 c = self.chars[i]
250 if c == '\\' and self.chars[self.nextPos(i)] == "\n":
251 i = self.nextPos(i)
252 continue
253 except EOF:
254 break
256 if c == "\n":
257 if len(buf) > 0 and len(command) == 0:
258 command = buf
259 i = self.prevPos(i)
260 break
261 elif c in [' ', "\t"]:
262 if not firstNonBlank:
263 # Ignore any leading blanks after the '#'.
264 continue
266 if len(command) == 0:
267 command = buf
268 buf = ''
269 else:
270 buf += ' '
271 elif c == '(':
272 if len(buf) > 0 and len(command) == 0:
273 command = buf
274 buf += c
275 else:
276 if not firstNonBlank:
277 firstNonBlank = True
278 buf += c
280 if command == 'define':
281 self.handleMacroDefine(buf)
282 elif command == 'include':
283 self.handleMacroInclude(buf)
284 elif command == 'ifdef':
285 defineName = buf.strip()
286 if self.defines.has_key(defineName):
287 self.visibilityStack.append(SrcLexer.VISIBLE)
288 else:
289 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
291 elif command == 'ifndef':
292 defineName = buf.strip()
293 if self.defines.has_key(defineName):
294 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
295 else:
296 self.visibilityStack.append(SrcLexer.VISIBLE)
298 elif command == 'if':
299 if self.evalCodeVisibility(buf):
300 self.visibilityStack.append(SrcLexer.VISIBLE)
301 else:
302 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
304 elif command == 'elif':
305 if len(self.visibilityStack) == 0:
306 raise ParseError ('')
308 if self.visibilityStack[-1] == SrcLexer.VISIBLE:
309 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
310 elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
311 # Evaluate only if the current visibility is false.
312 if self.evalCodeVisibility(buf):
313 self.visibilityStack[-1] = SrcLexer.VISIBLE
315 elif command == 'else':
316 if len(self.visibilityStack) == 0:
317 raise ParseError ('')
319 if self.visibilityStack[-1] == SrcLexer.VISIBLE:
320 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
321 if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
322 self.visibilityStack[-1] = SrcLexer.VISIBLE
324 elif command == 'endif':
325 if len(self.visibilityStack) == 0:
326 raise ParseError ('')
327 self.visibilityStack.pop()
329 elif command == 'undef':
330 pass
331 elif command in ['error', 'pragma']:
332 pass
333 else:
334 print "'%s' '%s'"%(command, buf)
335 print self.filepath
336 sys.exit(0)
338 return i
341 def evalCodeVisibility (self, buf):
342 try:
343 return eval(buf)
344 except:
345 return True
347 def handleMacroDefine (self, buf):
349 mparser = macroparser.MacroParser(buf)
350 mparser.debug = self.debugMacro
351 mparser.parse()
352 macro = mparser.getMacro()
353 if macro:
354 self.defines[macro.name] = macro
356 def handleMacroInclude (self, buf):
358 # Strip excess string if any.
359 pos = buf.find(' ')
360 if pos >= 0:
361 buf = buf[:pos]
362 headerSub = removeHeaderQuotes(buf)
364 if not self.expandHeaders:
365 # We don't want to expand headers. Bail out.
366 if self.debug:
367 progress ("%s ignored\n"%headerSub)
368 return
370 defines = {}
371 headerPath = None
372 for includeDir in self.includeDirs:
373 hpath = includeDir + '/' + headerSub
374 if os.path.isfile(hpath) and hpath != self.filepath:
375 headerPath = hpath
376 break
378 if not headerPath:
379 error("included header file " + headerSub + " not found\n", self.stopOnHeader)
380 return
382 if self.debug:
383 progress ("%s found\n"%headerPath)
385 if headerPath in self.headerDict:
386 if self.debug:
387 progress ("%s already included\n"%headerPath)
388 return
390 if SrcLexer.headerCache.has_key(headerPath):
391 if self.debug:
392 progress ("%s in cache\n"%headerPath)
393 for key in SrcLexer.headerCache[headerPath].defines.keys():
394 self.defines[key] = SrcLexer.headerCache[headerPath].defines[key]
395 return
397 chars = open(headerPath, 'r').read()
398 mclexer = SrcLexer(chars, headerPath)
399 mclexer.copyProperties(self)
400 mclexer.parentLexer = self
401 mclexer.tokenize()
402 hdrData = HeaderData()
403 hdrData.tokens = mclexer.getTokens()
404 headerDefines = mclexer.getDefines()
405 for key in headerDefines.keys():
406 defines[key] = headerDefines[key]
407 hdrData.defines[key] = headerDefines[key]
409 self.headerDict[headerPath] = True
410 SrcLexer.headerCache[headerPath] = hdrData
412 # Update the list of headers that have already been expaneded.
413 for key in mclexer.headerDict.keys():
414 self.headerDict[key] = True
416 if self.debug:
417 progress ("defines found in header %s:\n"%headerSub)
418 for key in defines.keys():
419 progress (" '%s'\n"%key)
421 for key in defines.keys():
422 self.defines[key] = defines[key]
425 def slash (self, i):
426 if not self.isCodeVisible():
427 return i
429 if i < self.bufsize - 1 and self.chars[i+1] == '/':
430 # Parse line comment.
431 line = ''
432 i += 2
433 while i < self.bufsize:
434 c = self.chars[i]
435 if ord(c) in [0x0A, 0x0D]:
436 return i - 1
437 line += c
438 i += 1
439 self.token = ''
440 elif i < self.bufsize - 1 and self.chars[i+1] == '*':
441 comment = ''
442 i += 2
443 while i < self.bufsize:
444 c = self.chars[i]
445 if c == '/' and self.chars[i-1] == '*':
446 return i
447 comment += c
448 i += 1
449 else:
450 return self.anyToken(i, '/')
452 return i
455 def lineBreak (self, i):
456 if not self.isCodeVisible():
457 return i
459 self.maybeAddToken()
461 return i
464 def doubleQuote (self, i):
465 if not self.isCodeVisible():
466 return i
468 literal = ''
469 i += 1
470 while i < self.bufsize:
471 c = self.chars[i]
472 if c == '"':
473 self.tokens.append('"'+literal+'"')
474 break
475 literal += c
476 i += 1
478 return i
481 def anyToken (self, i, token):
482 if not self.isCodeVisible():
483 return i
485 self.maybeAddToken()
486 self.token = token
487 self.maybeAddToken()
488 return i