pgworksheet_yvesf/pgw/Lexical.py

   1 #!/usr/bin/env python
   2 # -*- coding: latin-1; -*-
   3 #
   4 # PgWorksheet - PostgreSQL Front End
   5 # http://pgworksheet.projects.postgresql.org/
   6 #
   7 # Copyright © 2004-2005 Henri Michelon & CML http://www.e-cml.org/
   8 #
   9 # This program is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU General Public License
  11 # as published by the Free Software Foundation; either version 2
  12 # of the License, or (at your option) any later version.
  13 #
  14 # This program is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 # GNU General Public License for more details (read LICENSE.txt).
  18 #
  19 # You should have received a copy of the GNU General Public License
  20 # along with this program; if not, write to the Free Software
  21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  22 #
  23 # $Id: Lexical.py,v 1.6 2005/10/25 17:31:24 hmichelon Exp $
  24 #
  25
  26 # http://www.postgresql.org/docs/8.0/static/sql-syntax.html
  27
  28 # basic characters sets
  29 SPACES = [ ' ', '\t', '\n' ]
  30 DIGITS = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
  31 NUMERIC = DIGITS + [ 'e', '.', '+', '-' ]
  32 OPERATOR_CHARS = [ '+', '-', '*', '/', '<', '>', '=', '~', '!', \
  33                    '@', '#', '%', '^', '&', '|', '`', '?' ]
  34 SPECIAL_CHARS = [ '(', ')', '[', ']', ',', ';', ':', '*', '.' ]
  35 OPERATORS = OPERATOR_CHARS + SPECIAL_CHARS
  36
  37 # not the first character of an identifier
  38 NOT_IDENT_START = SPECIAL_CHARS + OPERATOR_CHARS + DIGITS + [ '$' ]
  39 # not a character of an identifier
  40 NOT_IDENT_CHAR = SPECIAL_CHARS + OPERATOR_CHARS + SPACES + [ "'" ]
  41 # not a character of a dollar quoted string
  42 NOT_DOLLAR_QUOTED = [ '$' ] + SPACES
  43
  44
  45 class Token:
  46
  47   def __init__(self, token, start_iter, end_iter, value=None):
  48     self.token = token
  49     self.start_iter = start_iter
  50     self.end_iter = end_iter
  51     self.value = value
  52
  53
  54 class Eob:
  55   """End of Buffer Exception"""
  56
  57
  58 class Lexical:
  59   """Simplified lexical analyser"""
  60
  61   def analyse(self, buffer, start, end):
  62     """Run the lexical and syntaxical analysers then
  63     apply the syntax highlight to the buffer"""
  64     self.buffer = buffer
  65     self.current = start.copy()
  66     self.tokens = [];
  67     try:
  68       self.lexical_analyser(end.copy())
  69     except Eob:
  70       pass
  71     return self.tokens
  72
  73
  74   def next_char(self):
  75     """Returns the next character to analyse"""
  76     if (self.current.is_end()):
  77       raise Eob()
  78     c = self.current.get_char()
  79     self.current.forward_char()
  80     return c
  81
  82
  83   def skip_spaces(self, c):
  84     """Skips everything that looks like a space/tab/etc..."""
  85     while (c in SPACES):
  86       c = self.next_char()
  87     return c
  88
  89
  90   def string(self):
  91     """Single quoted strings"""
  92     start = self.current.copy()
  93     start.backward_char()
  94     prev = None
  95     c = self.next_char()
  96     try:
  97       while (True):
  98         if ((c == "'") and (prev != '\\')): # a single quote in the string...
  99           c = self.next_char()
 100           if (c != "'"):
 101             break
 102         prev = c
 103         c = self.next_char()
 104     except Eob:
 105       end = self.current.copy()
 106       self.tokens.append(Token('string', start, end))
 107       raise
 108     end = self.current.copy()
 109     end.backward_char()
 110     self.tokens.append(Token('string', start, end))
 111     return c
 112
 113
 114   def dollar_string(self):
 115     """Dollar-quoted strings"""
 116     # first bound
 117     start = self.current.copy()
 118     start.backward_char()
 119     c = self.next_char()
 120     string_tag = ''
 121     try:
 122       while (c not in NOT_DOLLAR_QUOTED):
 123         string_tag = string_tag + c
 124         c = self.next_char()
 125     except Eob:
 126       end = self.current.copy()
 127       self.tokens.append(Token('identifier', start, end, string_tag.upper()))
 128       raise
 129     end = self.current.copy()
 130     end.backward_char()
 131     if (c != '$'):
 132       self.tokens.append(Token('identifier', start, end, string_tag.upper()))
 133       return c
 134     self.tokens.append(Token('dollarquote', start, end, string_tag.upper()))
 135
 136     # string content
 137     start = self.current.copy()
 138     try:
 139       c = self.next_char()
 140     except Eob:
 141       end = self.current.copy()
 142       self.tokens.append(Token('identifier', start, end, string_tag.upper()))
 143       raise
 144     try:
 145       while (True):
 146         if (c == '$'):
 147           string_end = self.current.copy()
 148           c = self.next_char()
 149           s = ''
 150           while (c not in NOT_DOLLAR_QUOTED):
 151             s = s + c
 152             c = self.next_char()
 153           if (s == string_tag):
 154             string_end.backward_char()
 155             self.tokens.append(Token('string', start, string_end))
 156             end = self.current.copy()
 157             end.backward_char()
 158             self.tokens.append(Token('dollarquote', start, end, s.upper()))
 159             return c
 160         else:
 161           c = self.next_char()
 162     except Eob:
 163       end = self.current.copy()
 164       self.tokens.append(Token('string', start, end))
 165       raise
 166     end = self.current.copy()
 167     end.backward_char()
 168     self.tokens.append(Token('string', start, end))
 169     return c
 170
 171   def bit_string_constant(self, start):
 172     """Binary and Hexadecimal numeric constants using strings"""
 173     c = self.next_char()
 174     if (c == "'"):
 175       c = self.next_char()
 176       start = self.current.copy()
 177       start.backward_char()
 178       start.backward_char()
 179       start.backward_char()
 180       while (c != "'"):
 181         c = self.next_char()
 182       end = self.current.copy()
 183       self.tokens.append(Token('numeric_constant', start, end))
 184       return self.next_char()
 185     else:
 186       return self.identifier(c, start)
 187
 188
 189   def identifier(self, c, ident = ''):
 190     """An identifier, keyword, type name, etc..."""
 191     start = self.current.copy()
 192     for i in range(0, len(ident) + 1):
 193       start.backward_char()
 194     try:
 195       while (c not in NOT_IDENT_CHAR):
 196         ident = ident + c
 197         c = self.next_char()
 198     except Eob:
 199       end = self.current.copy()
 200       self.tokens.append(Token('identifier', start, end, ident.upper()))
 201       raise
 202     end = self.current.copy()
 203     end.backward_char()
 204     self.tokens.append(Token('identifier', start, end, ident.upper()))
 205     return c
 206
 207
 208   def numeric(self, c):
 209     """A numeric constant"""
 210     start = self.current.copy()
 211     start.backward_char()
 212     try:
 213       while (c in NUMERIC):
 214         c = self.next_char()
 215     except Eob:
 216       end = self.current.copy()
 217       self.tokens.append(Token('numeric_constant', start, end))
 218       raise
 219     end = self.current.copy()
 220     end.backward_char()
 221     self.tokens.append(Token('numeric_constant', start, end))
 222     return c
 223
 224
 225   def simple_comment(self):
 226     """One line comment using --"""
 227     start = self.current.copy()
 228     start.backward_char()
 229     start.backward_char()
 230     c = self.next_char()
 231     try:
 232       while (c != '\n'):
 233         c = self.next_char()
 234     except Eob:
 235       end = self.current.copy()
 236       self.tokens.append(Token('comment', start, end))
 237       raise
 238     end = self.current.copy()
 239     self.tokens.append(Token('comment', start, end))
 240
 241
 242   def comment(self):
 243     """Multi lines comments using /* */"""
 244     start = self.current.copy()
 245     start.backward_char()
 246     start.backward_char()
 247     c = self.next_char()
 248     prev = None
 249     nested = 0
 250     try:
 251       while (True):
 252         if (c == '*'):
 253           c = self.next_char()
 254           if (prev == '/'):
 255             nested = nested + 1
 256             continue
 257           if (c == '/'):
 258             if (nested == 0):
 259               c = self.next_char()
 260               break
 261             else:
 262               nested = nested - 1
 263           else:
 264             prev = c
 265             continue
 266         prev = c
 267         c = self.next_char()
 268     except Eob:
 269       end = self.current.copy()
 270       self.tokens.append(Token('comment', start, end))
 271       raise
 272     end = self.current.copy()
 273     end.backward_char()
 274     self.tokens.append(Token('comment', start, end))
 275     return c
 276
 277
 278   def psql(self):
 279     """A PgSQL Command"""
 280     start = self.current.copy()
 281     start.backward_char()
 282     c = self.next_char()
 283     cmd = '\\'
 284     try:
 285       while (c != '\n') and (c != ';'):
 286         cmd = cmd + c
 287         c = self.next_char()
 288     except:
 289       end = self.current.copy()
 290       self.tokens.append(Token('psql', start, end, cmd))
 291       raise
 292     end = self.current.copy()
 293     self.tokens.append(Token('psql', start, end, cmd))
 294
 295
 296   def lexical_analyser(self, fin):
 297     """A simplified lexical analyser"""
 298     c = self.next_char()
 299     while (self.current.compare(fin) <= 0):
 300       c = self.skip_spaces(c)
 301       # Multi lines comments
 302       if (c == '/'):
 303         c = self.next_char()
 304         if (c == '*'):
 305           c = self.comment()
 306           continue
 307         else:
 308           self.current.backward_char()
 309       # One line comments
 310       elif (c == '-'):
 311         c = self.next_char()
 312         if (c == '-'):
 313           self.simple_comment()
 314         else:
 315           self.current.backward_char()
 316       # psql commands
 317       elif (c == '\\'):
 318         self.psql()
 319       # numeric
 320       elif (c in DIGITS):
 321         c = self.numeric(c)
 322         continue
 323       # bit strings
 324       elif (c == 'B') or (c == 'b') or (c == 'H') or (c == 'h'):
 325         c = self.bit_string_constant(c)
 326         continue
 327       # strings
 328       elif (c == "'"):
 329         c = self.string()
 330         continue
 331       # dollar-quoted strings
 332       elif (c == '$'):
 333         c = self.dollar_string()
 334         continue
 335       # numeric
 336       elif (c == '.'):
 337         c = self.next_char()
 338         if (c in DIGITS):
 339           self.current.backward_char()
 340           c = self.numeric(self.current.get_char())
 341           continue
 342       # quoted identifiers
 343       elif (c == '"'):
 344         c = self.next_char()
 345         while (c != '"'):
 346           c = self.next_char()
 347       # operators
 348       elif (c in OPERATORS):
 349         start = self.current.copy()
 350         start.backward_char()
 351         end = self.current.copy()
 352         self.tokens.append(Token('operator', start, end, c))
 353       # everything else
 354       elif (c not in NOT_IDENT_START):
 355         c = self.identifier(c)
 356         continue
 357       c = self.next_char()