2 # -*- coding: latin-1; -*-
4 # PgWorksheet - PostgreSQL Front End
5 # http://pgworksheet.projects.postgresql.org/
7 # Copyright © 2004-2005 Henri Michelon & CML http://www.e-cml.org/
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # as published by the Free Software Foundation; either version 2
12 # of the License, or (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details (read LICENSE.txt).
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 # $Id: Lexical.py,v 1.6 2005/10/25 17:31:24 hmichelon Exp $
26 # http://www.postgresql.org/docs/8.0/static/sql-syntax.html
28 # basic characters sets
29 SPACES
= [ ' ', '\t', '\n' ]
30 DIGITS
= [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
31 NUMERIC
= DIGITS
+ [ 'e', '.', '+', '-' ]
32 OPERATOR_CHARS
= [ '+', '-', '*', '/', '<', '>', '=', '~', '!', \
33 '@', '#', '%', '^', '&', '|', '`', '?' ]
34 SPECIAL_CHARS
= [ '(', ')', '[', ']', ',', ';', ':', '*', '.' ]
35 OPERATORS
= OPERATOR_CHARS
+ SPECIAL_CHARS
37 # not the first character of an identifier
38 NOT_IDENT_START
= SPECIAL_CHARS
+ OPERATOR_CHARS
+ DIGITS
+ [ '$' ]
39 # not a character of an identifier
40 NOT_IDENT_CHAR
= SPECIAL_CHARS
+ OPERATOR_CHARS
+ SPACES
+ [ "'" ]
41 # not a character of a dollar quoted string
42 NOT_DOLLAR_QUOTED
= [ '$' ] + SPACES
47 def __init__(self
, token
, start_iter
, end_iter
, value
=None):
49 self
.start_iter
= start_iter
50 self
.end_iter
= end_iter
55 """End of Buffer Exception"""
59 """Simplified lexical analyser"""
61 def analyse(self
, buffer, start
, end
):
62 """Run the lexical and syntaxical analysers then
63 apply the syntax highlight to the buffer"""
65 self
.current
= start
.copy()
68 self
.lexical_analyser(end
.copy())
75 """Returns the next character to analyse"""
76 if (self
.current
.is_end()):
78 c
= self
.current
.get_char()
79 self
.current
.forward_char()
83 def skip_spaces(self
, c
):
84 """Skips everything that looks like a space/tab/etc..."""
91 """Single quoted strings"""
92 start
= self
.current
.copy()
98 if ((c
== "'") and (prev
!= '\\')): # a single quote in the string...
105 end
= self
.current
.copy()
106 self
.tokens
.append(Token('string', start
, end
))
108 end
= self
.current
.copy()
110 self
.tokens
.append(Token('string', start
, end
))
114 def dollar_string(self
):
115 """Dollar-quoted strings"""
117 start
= self
.current
.copy()
118 start
.backward_char()
122 while (c
not in NOT_DOLLAR_QUOTED
):
123 string_tag
= string_tag
+ c
126 end
= self
.current
.copy()
127 self
.tokens
.append(Token('identifier', start
, end
, string_tag
.upper()))
129 end
= self
.current
.copy()
132 self
.tokens
.append(Token('identifier', start
, end
, string_tag
.upper()))
134 self
.tokens
.append(Token('dollarquote', start
, end
, string_tag
.upper()))
137 start
= self
.current
.copy()
141 end
= self
.current
.copy()
142 self
.tokens
.append(Token('identifier', start
, end
, string_tag
.upper()))
147 string_end
= self
.current
.copy()
150 while (c
not in NOT_DOLLAR_QUOTED
):
153 if (s
== string_tag
):
154 string_end
.backward_char()
155 self
.tokens
.append(Token('string', start
, string_end
))
156 end
= self
.current
.copy()
158 self
.tokens
.append(Token('dollarquote', start
, end
, s
.upper()))
163 end
= self
.current
.copy()
164 self
.tokens
.append(Token('string', start
, end
))
166 end
= self
.current
.copy()
168 self
.tokens
.append(Token('string', start
, end
))
171 def bit_string_constant(self
, start
):
172 """Binary and Hexadecimal numeric constants using strings"""
176 start
= self
.current
.copy()
177 start
.backward_char()
178 start
.backward_char()
179 start
.backward_char()
182 end
= self
.current
.copy()
183 self
.tokens
.append(Token('numeric_constant', start
, end
))
184 return self
.next_char()
186 return self
.identifier(c
, start
)
189 def identifier(self
, c
, ident
= ''):
190 """An identifier, keyword, type name, etc..."""
191 start
= self
.current
.copy()
192 for i
in range(0, len(ident
) + 1):
193 start
.backward_char()
195 while (c
not in NOT_IDENT_CHAR
):
199 end
= self
.current
.copy()
200 self
.tokens
.append(Token('identifier', start
, end
, ident
.upper()))
202 end
= self
.current
.copy()
204 self
.tokens
.append(Token('identifier', start
, end
, ident
.upper()))
208 def numeric(self
, c
):
209 """A numeric constant"""
210 start
= self
.current
.copy()
211 start
.backward_char()
213 while (c
in NUMERIC
):
216 end
= self
.current
.copy()
217 self
.tokens
.append(Token('numeric_constant', start
, end
))
219 end
= self
.current
.copy()
221 self
.tokens
.append(Token('numeric_constant', start
, end
))
225 def simple_comment(self
):
226 """One line comment using --"""
227 start
= self
.current
.copy()
228 start
.backward_char()
229 start
.backward_char()
235 end
= self
.current
.copy()
236 self
.tokens
.append(Token('comment', start
, end
))
238 end
= self
.current
.copy()
239 self
.tokens
.append(Token('comment', start
, end
))
243 """Multi lines comments using /* */"""
244 start
= self
.current
.copy()
245 start
.backward_char()
246 start
.backward_char()
269 end
= self
.current
.copy()
270 self
.tokens
.append(Token('comment', start
, end
))
272 end
= self
.current
.copy()
274 self
.tokens
.append(Token('comment', start
, end
))
279 """A PgSQL Command"""
280 start
= self
.current
.copy()
281 start
.backward_char()
285 while (c
!= '\n') and (c
!= ';'):
289 end
= self
.current
.copy()
290 self
.tokens
.append(Token('psql', start
, end
, cmd
))
292 end
= self
.current
.copy()
293 self
.tokens
.append(Token('psql', start
, end
, cmd
))
296 def lexical_analyser(self
, fin
):
297 """A simplified lexical analyser"""
299 while (self
.current
.compare(fin
) <= 0):
300 c
= self
.skip_spaces(c
)
301 # Multi lines comments
308 self
.current
.backward_char()
313 self
.simple_comment()
315 self
.current
.backward_char()
324 elif (c
== 'B') or (c
== 'b') or (c
== 'H') or (c
== 'h'):
325 c
= self
.bit_string_constant(c
)
331 # dollar-quoted strings
333 c
= self
.dollar_string()
339 self
.current
.backward_char()
340 c
= self
.numeric(self
.current
.get_char())
348 elif (c
in OPERATORS
):
349 start
= self
.current
.copy()
350 start
.backward_char()
351 end
= self
.current
.copy()
352 self
.tokens
.append(Token('operator', start
, end
, c
))
354 elif (c
not in NOT_IDENT_START
):
355 c
= self
.identifier(c
)