toolkit/src2xml/source/srclexer.py

   1 import sys, os.path
   2 from globals import *
   3 import macroparser
   4
   5 class EOF(Exception):
   6     def __init__ (self):
   7         pass
   8
   9     def str (self):
  10         return "end of file"
  11
  12 class BOF(Exception):
  13     def __init__ (self):
  14         pass
  15
  16     def str (self):
  17         return "beginning of file"
  18
  19
  20 def removeHeaderQuotes (orig):
  21     if len(orig) <= 2:
  22         return orig
  23     elif orig[0] == orig[-1] == '"':
  24         return orig[1:-1]
  25     elif orig[0] == '<' and orig[-1] == '>':
  26         return orig[1:-1]
  27
  28     return orig
  29
  30
  31 def dumpTokens (tokens, toError=False):
  32
  33     scope = 0
  34     indent = "    "
  35     line = ''
  36     chars = ''
  37
  38     for token in tokens:
  39         if token in '{<':
  40             if len(line) > 0:
  41                 chars += indent*scope + line + "\n"
  42                 line = ''
  43             chars += indent*scope + token + "\n"
  44             scope += 1
  45
  46         elif token in '}>':
  47             if len(line) > 0:
  48                 chars += indent*scope + line + "\n"
  49                 line = ''
  50             scope -= 1
  51             chars += indent*scope + token
  52
  53         elif token == ';':
  54             if len(line) > 0:
  55                 chars += indent*scope + line + ";\n"
  56                 line = ''
  57             else:
  58                 chars += ";\n"
  59         elif len(token) > 0:
  60             line += token + ' '
  61
  62     if len(line) > 0:
  63         chars += line
  64     chars += "\n"
  65     if toError:
  66         sys.stderr.write(chars)
  67     else:
  68         sys.stdout.write(chars)
  69
  70
  71 class HeaderData(object):
  72     def __init__ (self):
  73         self.defines = {}
  74         self.tokens = []
  75
  76
  77 class SrcLexer(object):
  78     """Lexicographical analyzer for .src format.
  79
  80 The role of a lexer is to parse the source file and break it into
  81 appropriate tokens.  Such tokens are later passed to a parser to
  82 build the syntax tree.
  83 """
  84     headerCache = {}
  85
  86     VISIBLE = 0
  87     INVISIBLE_PRE = 1
  88     INVISIBLE_POST = 2
  89
  90     def __init__ (self, chars, filepath = None):
  91         self.filepath = filepath
  92         self.parentLexer = None
  93         self.chars = chars
  94         self.bufsize = len(self.chars)
  95
  96         # TODO: use parameters for this
  97         # Properties that can be copied.
  98         self.headerDict = dict ()
  99         self.debug = False
 100         self.debugMacro = False
 101         self.includeDirs = list ()
 102         self.expandHeaders = True
 103         self.inMacroDefine = False
 104         self.stopOnHeader = False
 105
 106     def copyProperties (self, other):
 107         """Copy properties from another instance of SrcLexer."""
 108
 109         # TODO: use parameters for this
 110         self.headerDict = other.headerDict
 111         self.debug = other.debug
 112         self.debugMacro = other.debugMacro
 113         self.includeDirs = other.includeDirs[:]
 114         self.expandHeaders = other.expandHeaders
 115         self.inMacroDefine = other.inMacroDefine
 116         self.stopOnHeader = other.stopOnHeader
 117
 118     def init (self):
 119         self.firstNonBlank = ''
 120         self.token = ''
 121         self.tokens = []
 122         self.defines = {}
 123         self.visibilityStack = []
 124
 125     def getTokens (self):
 126         return self.tokens
 127
 128     def getDefines (self):
 129         return self.defines
 130
 131     def nextPos (self, i):
 132         while True:
 133             i += 1
 134             try:
 135                 c = self.chars[i]
 136             except IndexError:
 137                 raise EOF
 138
 139             if ord(c) in [0x0D]:
 140                 continue
 141             break
 142         return i
 143
 144     def prevPos (self, i):
 145         while True:
 146             i -= 1
 147             try:
 148                 c = self.chars[i]
 149             except IndexError:
 150                 raise BOF
 151
 152             if ord(c) in [0x0D]:
 153                 continue
 154             break
 155         return i
 156
 157     def isCodeVisible (self):
 158         if len(self.visibilityStack) == 0:
 159             return True
 160         for item in self.visibilityStack:
 161             if item != SrcLexer.VISIBLE:
 162                 return False
 163         return True
 164
 165     def tokenize (self):
 166         self.init()
 167
 168         i = 0
 169         while True:
 170             c = self.chars[i]
 171
 172             if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]:
 173                 # Store the first non-blank in a line.
 174                 self.firstNonBlank = c
 175             elif c == "\n":
 176                 self.firstNonBlank = ''
 177
 178             if c == '#':
 179                 i = self.pound(i)
 180             elif c == '/':
 181                 i = self.slash(i)
 182             elif c == "\n":
 183                 i = self.lineBreak(i)
 184             elif c == '"':
 185                 i = self.doubleQuote(i)
 186             elif c in [' ', "\t"]:
 187                 i = self.blank(i)
 188             elif c in ";()[]{}<>,=+-*":
 189                 # Any outstanding single-character token.
 190                 i = self.anyToken(i, c)
 191             elif self.isCodeVisible():
 192                 self.token += c
 193
 194             try:
 195                 i = self.nextPos(i)
 196             except EOF:
 197                 break
 198
 199         if len(self.token):
 200             self.tokens.append(self.token)
 201
 202         if not self.parentLexer and self.debug:
 203             progress ("-"*68 + "\n")
 204             progress ("All defines found in this translation unit:\n")
 205             keys = self.defines.keys()
 206             keys.sort()
 207             for key in keys:
 208                 progress ("@ %s\n"%key)
 209
 210     def dumpTokens (self, toError=False):
 211         dumpTokens(self.tokens, toError)
 212
 213
 214     def maybeAddToken (self):
 215         if len(self.token) > 0:
 216             self.tokens.append(self.token)
 217             self.token = ''
 218
 219
 220     #--------------------------------------------------------------------
 221     # character handlers
 222
 223     def blank (self, i):
 224         if not self.isCodeVisible():
 225             return i
 226
 227         self.maybeAddToken()
 228         return i
 229
 230
 231     def pound (self, i):
 232
 233         if self.inMacroDefine:
 234             return i
 235
 236         if not self.firstNonBlank == '#':
 237             return i
 238
 239         self.maybeAddToken()
 240         # We are in preprocessing mode.
 241
 242         # Get the macro command name '#<command> .....'
 243
 244         command, define, buf = '', '', ''
 245         firstNonBlank = False
 246         while True:
 247             try:
 248                 i = self.nextPos(i)
 249                 c = self.chars[i]
 250                 if c == '\\' and self.chars[self.nextPos(i)] == "\n":
 251                     i = self.nextPos(i)
 252                     continue
 253             except EOF:
 254                 break
 255
 256             if c == "\n":
 257                 if len(buf) > 0 and len(command) == 0:
 258                     command = buf
 259                 i = self.prevPos(i)
 260                 break
 261             elif c in [' ', "\t"]:
 262                 if not firstNonBlank:
 263                     # Ignore any leading blanks after the '#'.
 264                     continue
 265
 266                 if len(command) == 0:
 267                     command = buf
 268                     buf = ''
 269                 else:
 270                     buf += ' '
 271             elif c == '(':
 272                 if len(buf) > 0 and len(command) == 0:
 273                     command = buf
 274                 buf += c
 275             else:
 276                 if not firstNonBlank:
 277                     firstNonBlank = True
 278                 buf += c
 279
 280         if command == 'define':
 281             self.handleMacroDefine(buf)
 282         elif command == 'include':
 283             self.handleMacroInclude(buf)
 284         elif command == 'ifdef':
 285             defineName = buf.strip()
 286             if self.defines.has_key(defineName):
 287                 self.visibilityStack.append(SrcLexer.VISIBLE)
 288             else:
 289                 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
 290
 291         elif command == 'ifndef':
 292             defineName = buf.strip()
 293             if self.defines.has_key(defineName):
 294                 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
 295             else:
 296                 self.visibilityStack.append(SrcLexer.VISIBLE)
 297
 298         elif command == 'if':
 299             if self.evalCodeVisibility(buf):
 300                 self.visibilityStack.append(SrcLexer.VISIBLE)
 301             else:
 302                 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE)
 303
 304         elif command == 'elif':
 305             if len(self.visibilityStack) == 0:
 306                 raise ParseError ('')
 307
 308             if self.visibilityStack[-1] == SrcLexer.VISIBLE:
 309                 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
 310             elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
 311                 # Evaluate only if the current visibility is false.
 312                 if self.evalCodeVisibility(buf):
 313                     self.visibilityStack[-1] = SrcLexer.VISIBLE
 314
 315         elif command == 'else':
 316             if len(self.visibilityStack) == 0:
 317                 raise ParseError ('')
 318
 319             if self.visibilityStack[-1] == SrcLexer.VISIBLE:
 320                 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST
 321             if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE:
 322                 self.visibilityStack[-1] = SrcLexer.VISIBLE
 323
 324         elif command == 'endif':
 325             if len(self.visibilityStack) == 0:
 326                 raise ParseError ('')
 327             self.visibilityStack.pop()
 328
 329         elif command == 'undef':
 330             pass
 331         elif command in ['error', 'pragma']:
 332             pass
 333         else:
 334             print "'%s' '%s'"%(command, buf)
 335             print self.filepath
 336             sys.exit(0)
 337
 338         return i
 339
 340
 341     def evalCodeVisibility (self, buf):
 342         try:
 343             return eval(buf)
 344         except:
 345             return True
 346
 347     def handleMacroDefine (self, buf):
 348
 349         mparser = macroparser.MacroParser(buf)
 350         mparser.debug = self.debugMacro
 351         mparser.parse()
 352         macro = mparser.getMacro()
 353         if macro:
 354             self.defines[macro.name] = macro
 355
 356     def handleMacroInclude (self, buf):
 357
 358         # Strip excess string if any.
 359         pos = buf.find(' ')
 360         if pos >= 0:
 361             buf = buf[:pos]
 362         headerSub = removeHeaderQuotes(buf)
 363
 364         if not self.expandHeaders:
 365             # We don't want to expand headers.  Bail out.
 366             if self.debug:
 367                 progress ("%s ignored\n"%headerSub)
 368             return
 369
 370         defines = {}
 371         headerPath = None
 372         for includeDir in self.includeDirs:
 373             hpath = includeDir + '/' + headerSub
 374             if os.path.isfile(hpath) and hpath != self.filepath:
 375                 headerPath = hpath
 376                 break
 377
 378         if not headerPath:
 379             error("included header file " + headerSub + " not found\n", self.stopOnHeader)
 380             return
 381
 382         if self.debug:
 383             progress ("%s found\n"%headerPath)
 384
 385         if headerPath in self.headerDict:
 386             if self.debug:
 387                 progress ("%s already included\n"%headerPath)
 388             return
 389
 390         if SrcLexer.headerCache.has_key(headerPath):
 391             if self.debug:
 392                 progress ("%s in cache\n"%headerPath)
 393             for key in SrcLexer.headerCache[headerPath].defines.keys():
 394                 self.defines[key] = SrcLexer.headerCache[headerPath].defines[key]
 395             return
 396
 397         chars = open(headerPath, 'r').read()
 398         mclexer = SrcLexer(chars, headerPath)
 399         mclexer.copyProperties(self)
 400         mclexer.parentLexer = self
 401         mclexer.tokenize()
 402         hdrData = HeaderData()
 403         hdrData.tokens = mclexer.getTokens()
 404         headerDefines = mclexer.getDefines()
 405         for key in headerDefines.keys():
 406             defines[key] = headerDefines[key]
 407             hdrData.defines[key] = headerDefines[key]
 408
 409         self.headerDict[headerPath] = True
 410         SrcLexer.headerCache[headerPath] = hdrData
 411
 412         # Update the list of headers that have already been expaneded.
 413         for key in mclexer.headerDict.keys():
 414             self.headerDict[key] = True
 415
 416         if self.debug:
 417             progress ("defines found in header %s:\n"%headerSub)
 418             for key in defines.keys():
 419                 progress ("  '%s'\n"%key)
 420
 421         for key in defines.keys():
 422             self.defines[key] = defines[key]
 423
 424
 425     def slash (self, i):
 426         if not self.isCodeVisible():
 427             return i
 428
 429         if i < self.bufsize - 1 and self.chars[i+1] == '/':
 430             # Parse line comment.
 431             line = ''
 432             i += 2
 433             while i < self.bufsize:
 434                 c = self.chars[i]
 435                 if ord(c) in [0x0A, 0x0D]:
 436                     return i - 1
 437                 line += c
 438                 i += 1
 439             self.token = ''
 440         elif i < self.bufsize - 1 and self.chars[i+1] == '*':
 441             comment = ''
 442             i += 2
 443             while i < self.bufsize:
 444                 c = self.chars[i]
 445                 if c == '/' and self.chars[i-1] == '*':
 446                     return i
 447                 comment += c
 448                 i += 1
 449         else:
 450             return self.anyToken(i, '/')
 451
 452         return i
 453
 454
 455     def lineBreak (self, i):
 456         if not self.isCodeVisible():
 457             return i
 458
 459         self.maybeAddToken()
 460
 461         return i
 462
 463
 464     def doubleQuote (self, i):
 465         if not self.isCodeVisible():
 466             return i
 467
 468         literal = ''
 469         i += 1
 470         while i < self.bufsize:
 471             c = self.chars[i]
 472             if c == '"':
 473                 self.tokens.append('"'+literal+'"')
 474                 break
 475             literal += c
 476             i += 1
 477
 478         return i
 479
 480
 481     def anyToken (self, i, token):
 482         if not self.isCodeVisible():
 483             return i
 484
 485         self.maybeAddToken()
 486         self.token = token
 487         self.maybeAddToken()
 488         return i