del net-oscar
[learning-git.git] / pgworksheet_yvesf / pgw / Lexical.py
blobea1c03890aabc39f1b18236defa089518600f9c9
1 #!/usr/bin/env python
2 # -*- coding: latin-1; -*-
4 # PgWorksheet - PostgreSQL Front End
5 # http://pgworksheet.projects.postgresql.org/
7 # Copyright © 2004-2005 Henri Michelon & CML http://www.e-cml.org/
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # as published by the Free Software Foundation; either version 2
12 # of the License, or (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details (read LICENSE.txt).
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 # $Id: Lexical.py,v 1.6 2005/10/25 17:31:24 hmichelon Exp $
26 # http://www.postgresql.org/docs/8.0/static/sql-syntax.html
28 # basic characters sets
29 SPACES = [ ' ', '\t', '\n' ]
30 DIGITS = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
31 NUMERIC = DIGITS + [ 'e', '.', '+', '-' ]
32 OPERATOR_CHARS = [ '+', '-', '*', '/', '<', '>', '=', '~', '!', \
33 '@', '#', '%', '^', '&', '|', '`', '?' ]
34 SPECIAL_CHARS = [ '(', ')', '[', ']', ',', ';', ':', '*', '.' ]
35 OPERATORS = OPERATOR_CHARS + SPECIAL_CHARS
37 # not the first character of an identifier
38 NOT_IDENT_START = SPECIAL_CHARS + OPERATOR_CHARS + DIGITS + [ '$' ]
39 # not a character of an identifier
40 NOT_IDENT_CHAR = SPECIAL_CHARS + OPERATOR_CHARS + SPACES + [ "'" ]
41 # not a character of a dollar quoted string
42 NOT_DOLLAR_QUOTED = [ '$' ] + SPACES
45 class Token:
47 def __init__(self, token, start_iter, end_iter, value=None):
48 self.token = token
49 self.start_iter = start_iter
50 self.end_iter = end_iter
51 self.value = value
54 class Eob:
55 """End of Buffer Exception"""
58 class Lexical:
59 """Simplified lexical analyser"""
61 def analyse(self, buffer, start, end):
62 """Run the lexical and syntaxical analysers then
63 apply the syntax highlight to the buffer"""
64 self.buffer = buffer
65 self.current = start.copy()
66 self.tokens = [];
67 try:
68 self.lexical_analyser(end.copy())
69 except Eob:
70 pass
71 return self.tokens
74 def next_char(self):
75 """Returns the next character to analyse"""
76 if (self.current.is_end()):
77 raise Eob()
78 c = self.current.get_char()
79 self.current.forward_char()
80 return c
83 def skip_spaces(self, c):
84 """Skips everything that looks like a space/tab/etc..."""
85 while (c in SPACES):
86 c = self.next_char()
87 return c
90 def string(self):
91 """Single quoted strings"""
92 start = self.current.copy()
93 start.backward_char()
94 prev = None
95 c = self.next_char()
96 try:
97 while (True):
98 if ((c == "'") and (prev != '\\')): # a single quote in the string...
99 c = self.next_char()
100 if (c != "'"):
101 break
102 prev = c
103 c = self.next_char()
104 except Eob:
105 end = self.current.copy()
106 self.tokens.append(Token('string', start, end))
107 raise
108 end = self.current.copy()
109 end.backward_char()
110 self.tokens.append(Token('string', start, end))
111 return c
114 def dollar_string(self):
115 """Dollar-quoted strings"""
116 # first bound
117 start = self.current.copy()
118 start.backward_char()
119 c = self.next_char()
120 string_tag = ''
121 try:
122 while (c not in NOT_DOLLAR_QUOTED):
123 string_tag = string_tag + c
124 c = self.next_char()
125 except Eob:
126 end = self.current.copy()
127 self.tokens.append(Token('identifier', start, end, string_tag.upper()))
128 raise
129 end = self.current.copy()
130 end.backward_char()
131 if (c != '$'):
132 self.tokens.append(Token('identifier', start, end, string_tag.upper()))
133 return c
134 self.tokens.append(Token('dollarquote', start, end, string_tag.upper()))
136 # string content
137 start = self.current.copy()
138 try:
139 c = self.next_char()
140 except Eob:
141 end = self.current.copy()
142 self.tokens.append(Token('identifier', start, end, string_tag.upper()))
143 raise
144 try:
145 while (True):
146 if (c == '$'):
147 string_end = self.current.copy()
148 c = self.next_char()
149 s = ''
150 while (c not in NOT_DOLLAR_QUOTED):
151 s = s + c
152 c = self.next_char()
153 if (s == string_tag):
154 string_end.backward_char()
155 self.tokens.append(Token('string', start, string_end))
156 end = self.current.copy()
157 end.backward_char()
158 self.tokens.append(Token('dollarquote', start, end, s.upper()))
159 return c
160 else:
161 c = self.next_char()
162 except Eob:
163 end = self.current.copy()
164 self.tokens.append(Token('string', start, end))
165 raise
166 end = self.current.copy()
167 end.backward_char()
168 self.tokens.append(Token('string', start, end))
169 return c
171 def bit_string_constant(self, start):
172 """Binary and Hexadecimal numeric constants using strings"""
173 c = self.next_char()
174 if (c == "'"):
175 c = self.next_char()
176 start = self.current.copy()
177 start.backward_char()
178 start.backward_char()
179 start.backward_char()
180 while (c != "'"):
181 c = self.next_char()
182 end = self.current.copy()
183 self.tokens.append(Token('numeric_constant', start, end))
184 return self.next_char()
185 else:
186 return self.identifier(c, start)
189 def identifier(self, c, ident = ''):
190 """An identifier, keyword, type name, etc..."""
191 start = self.current.copy()
192 for i in range(0, len(ident) + 1):
193 start.backward_char()
194 try:
195 while (c not in NOT_IDENT_CHAR):
196 ident = ident + c
197 c = self.next_char()
198 except Eob:
199 end = self.current.copy()
200 self.tokens.append(Token('identifier', start, end, ident.upper()))
201 raise
202 end = self.current.copy()
203 end.backward_char()
204 self.tokens.append(Token('identifier', start, end, ident.upper()))
205 return c
208 def numeric(self, c):
209 """A numeric constant"""
210 start = self.current.copy()
211 start.backward_char()
212 try:
213 while (c in NUMERIC):
214 c = self.next_char()
215 except Eob:
216 end = self.current.copy()
217 self.tokens.append(Token('numeric_constant', start, end))
218 raise
219 end = self.current.copy()
220 end.backward_char()
221 self.tokens.append(Token('numeric_constant', start, end))
222 return c
225 def simple_comment(self):
226 """One line comment using --"""
227 start = self.current.copy()
228 start.backward_char()
229 start.backward_char()
230 c = self.next_char()
231 try:
232 while (c != '\n'):
233 c = self.next_char()
234 except Eob:
235 end = self.current.copy()
236 self.tokens.append(Token('comment', start, end))
237 raise
238 end = self.current.copy()
239 self.tokens.append(Token('comment', start, end))
242 def comment(self):
243 """Multi lines comments using /* */"""
244 start = self.current.copy()
245 start.backward_char()
246 start.backward_char()
247 c = self.next_char()
248 prev = None
249 nested = 0
250 try:
251 while (True):
252 if (c == '*'):
253 c = self.next_char()
254 if (prev == '/'):
255 nested = nested + 1
256 continue
257 if (c == '/'):
258 if (nested == 0):
259 c = self.next_char()
260 break
261 else:
262 nested = nested - 1
263 else:
264 prev = c
265 continue
266 prev = c
267 c = self.next_char()
268 except Eob:
269 end = self.current.copy()
270 self.tokens.append(Token('comment', start, end))
271 raise
272 end = self.current.copy()
273 end.backward_char()
274 self.tokens.append(Token('comment', start, end))
275 return c
278 def psql(self):
279 """A PgSQL Command"""
280 start = self.current.copy()
281 start.backward_char()
282 c = self.next_char()
283 cmd = '\\'
284 try:
285 while (c != '\n') and (c != ';'):
286 cmd = cmd + c
287 c = self.next_char()
288 except:
289 end = self.current.copy()
290 self.tokens.append(Token('psql', start, end, cmd))
291 raise
292 end = self.current.copy()
293 self.tokens.append(Token('psql', start, end, cmd))
296 def lexical_analyser(self, fin):
297 """A simplified lexical analyser"""
298 c = self.next_char()
299 while (self.current.compare(fin) <= 0):
300 c = self.skip_spaces(c)
301 # Multi lines comments
302 if (c == '/'):
303 c = self.next_char()
304 if (c == '*'):
305 c = self.comment()
306 continue
307 else:
308 self.current.backward_char()
309 # One line comments
310 elif (c == '-'):
311 c = self.next_char()
312 if (c == '-'):
313 self.simple_comment()
314 else:
315 self.current.backward_char()
316 # psql commands
317 elif (c == '\\'):
318 self.psql()
319 # numeric
320 elif (c in DIGITS):
321 c = self.numeric(c)
322 continue
323 # bit strings
324 elif (c == 'B') or (c == 'b') or (c == 'H') or (c == 'h'):
325 c = self.bit_string_constant(c)
326 continue
327 # strings
328 elif (c == "'"):
329 c = self.string()
330 continue
331 # dollar-quoted strings
332 elif (c == '$'):
333 c = self.dollar_string()
334 continue
335 # numeric
336 elif (c == '.'):
337 c = self.next_char()
338 if (c in DIGITS):
339 self.current.backward_char()
340 c = self.numeric(self.current.get_char())
341 continue
342 # quoted identifiers
343 elif (c == '"'):
344 c = self.next_char()
345 while (c != '"'):
346 c = self.next_char()
347 # operators
348 elif (c in OPERATORS):
349 start = self.current.copy()
350 start.backward_char()
351 end = self.current.copy()
352 self.tokens.append(Token('operator', start, end, c))
353 # everything else
354 elif (c not in NOT_IDENT_START):
355 c = self.identifier(c)
356 continue
357 c = self.next_char()