2 # -*- coding: latin-1; -*-
4 # PgWorksheet - PostgreSQL Front End
5 # http://pgworksheet.projects.postgresql.org/
7 # Copyright © 2004-2008 Henri Michelon & CML http://www.e-cml.org/
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # as published by the Free Software Foundation; either version 2
12 # of the License, or (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details (read LICENSE.txt).
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 # $Id: Lexical.py,v 1.9 2008/03/12 20:26:23 hmichelon Exp $
26 # http://www.postgresql.org/docs/8.0/static/sql-syntax.html
28 # basic characters sets
29 SPACES
= [ ' ', '\t', '\n' ]
30 DIGITS
= [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
31 NUMERIC
= DIGITS
+ [ 'e', '.', '+', '-' ]
32 OPERATOR_CHARS
= [ '+', '-', '*', '/', '<', '>', '=', '~', '!', \
33 '@', '#', '%', '^', '&', '|', '`', '?' ]
34 SPECIAL_CHARS
= [ '(', ')', '[', ']', ',', ';', ':', '*', '.' ]
35 OPERATORS
= OPERATOR_CHARS
+ SPECIAL_CHARS
37 # not the first character of an identifier
38 NOT_IDENT_START
= SPECIAL_CHARS
+ OPERATOR_CHARS
+ DIGITS
+ [ '$' ]
39 # not a character of an identifier
40 NOT_IDENT_CHAR
= SPECIAL_CHARS
+ OPERATOR_CHARS
+ SPACES
+ [ "'" ]
41 # not a character of a dollar quoted string
42 NOT_DOLLAR_QUOTED
= [ '$' ] + SPACES
47 def __init__(self
, token
, start_iter
, end_iter
, value
=None):
49 self
.start_iter
= start_iter
50 self
.end_iter
= end_iter
55 """End of Buffer Exception"""
59 """Simplified lexical analyser"""
61 def analyse(self
, buffer, start
, end
):
62 """Run the lexical analyser"""
64 self
.current
= start
.copy()
67 self
.lexical_analyser(end
.copy())
74 """Returns the next character to analyse"""
75 if (self
.current
.is_end()):
77 c
= self
.current
.get_char()
78 self
.current
.forward_char()
82 def skip_spaces(self
, c
):
83 """Skips everything that looks like a space/tab/etc..."""
90 """Single quoted strings"""
91 start
= self
.current
.copy()
97 if ((c
== "'") and (prev
!= '\\')): # a single quote in the string...
104 end
= self
.current
.copy()
105 self
.tokens
.append(Token('string', start
, end
))
107 end
= self
.current
.copy()
109 self
.tokens
.append(Token('string', start
, end
))
113 def dollar_string(self
):
114 """Dollar-quoted strings"""
116 start
= self
.current
.copy()
117 start
.backward_char()
121 while (c
not in NOT_DOLLAR_QUOTED
):
122 string_tag
= string_tag
+ c
125 end
= self
.current
.copy()
126 self
.tokens
.append(Token('identifier', start
, end
, string_tag
.upper()))
128 end
= self
.current
.copy()
131 self
.tokens
.append(Token('identifier', start
, end
, string_tag
.upper()))
133 self
.tokens
.append(Token('dollarquote', start
, end
, string_tag
.upper()))
136 start
= self
.current
.copy()
140 end
= self
.current
.copy()
141 self
.tokens
.append(Token('identifier', start
, end
, string_tag
.upper()))
146 string_end
= self
.current
.copy()
149 while (c
not in NOT_DOLLAR_QUOTED
):
152 if (s
== string_tag
):
153 string_end
.backward_char()
154 self
.tokens
.append(Token('string', start
, string_end
))
155 end
= self
.current
.copy()
157 self
.tokens
.append(Token('dollarquote', start
, end
, s
.upper()))
162 end
= self
.current
.copy()
163 self
.tokens
.append(Token('string', start
, end
))
165 end
= self
.current
.copy()
167 self
.tokens
.append(Token('string', start
, end
))
171 def bit_string_constant(self
, start
):
172 """Binary and Hexadecimal numeric constants using strings"""
176 start
= self
.current
.copy()
177 start
.backward_char()
178 start
.backward_char()
179 start
.backward_char()
182 end
= self
.current
.copy()
183 self
.tokens
.append(Token('numeric_constant', start
, end
))
184 return self
.next_char()
186 return self
.identifier(c
, start
)
189 def identifier(self
, c
, ident
= ''):
190 """An identifier, keyword, type name, etc..."""
191 start
= self
.current
.copy()
192 for i
in range(0, len(ident
) + 1):
193 start
.backward_char()
195 while (c
not in NOT_IDENT_CHAR
):
199 end
= self
.current
.copy()
200 self
.tokens
.append(Token('identifier', start
, end
, ident
.upper()))
202 end
= self
.current
.copy()
204 self
.tokens
.append(Token('identifier', start
, end
, ident
.upper()))
208 def numeric(self
, c
):
209 """A numeric constant"""
210 start
= self
.current
.copy()
211 start
.backward_char()
213 while (c
in NUMERIC
):
216 end
= self
.current
.copy()
217 self
.tokens
.append(Token('numeric_constant', start
, end
))
219 end
= self
.current
.copy()
221 self
.tokens
.append(Token('numeric_constant', start
, end
))
225 def simple_comment(self
):
226 """One line comment using --"""
227 start
= self
.current
.copy()
228 start
.backward_char()
229 start
.backward_char()
235 end
= self
.current
.copy()
236 self
.tokens
.append(Token('comment', start
, end
))
238 end
= self
.current
.copy()
239 self
.tokens
.append(Token('comment', start
, end
))
243 """Multi lines comments using /* */"""
244 start
= self
.current
.copy()
245 start
.backward_char()
246 start
.backward_char()
269 end
= self
.current
.copy()
270 self
.tokens
.append(Token('comment', start
, end
))
272 end
= self
.current
.copy()
274 self
.tokens
.append(Token('comment', start
, end
))
279 """A PgSQL Command"""
280 start
= self
.current
.copy()
281 start
.backward_char()
285 while (c
!= '\n') and (c
!= ';'):
289 end
= self
.current
.copy()
290 self
.tokens
.append(Token('psql', start
, end
, cmd
))
292 end
= self
.current
.copy()
293 self
.tokens
.append(Token('psql', start
, end
, cmd
))
296 def lexical_analyser(self
, fin
):
297 """A simplified lexical analyser"""
299 while (self
.current
.compare(fin
) <= 0):
300 c
= self
.skip_spaces(c
)
301 # Multi lines comments
308 self
.current
.backward_char()
313 self
.simple_comment()
315 self
.current
.backward_char()
324 elif (c
== 'B') or (c
== 'b') or (c
== 'H') or (c
== 'h'):
325 c
= self
.bit_string_constant(c
)
331 # dollar-quoted strings
333 c
= self
.dollar_string()
339 self
.current
.backward_char()
340 c
= self
.numeric(self
.current
.get_char())
348 elif (c
in OPERATORS
):
349 start
= self
.current
.copy()
350 start
.backward_char()
351 end
= self
.current
.copy()
352 self
.tokens
.append(Token('operator', start
, end
, c
))
354 elif (c
not in NOT_IDENT_START
):
355 c
= self
.identifier(c
)