del net-oscar
[learning-git.git] / pgworksheet_1.9 / pgw / Lexical.py
blob9530d0861368f1f51c7c0e70cf287ff8c634ba4c
1 #!/usr/bin/env python
2 # -*- coding: latin-1; -*-
4 # PgWorksheet - PostgreSQL Front End
5 # http://pgworksheet.projects.postgresql.org/
7 # Copyright © 2004-2008 Henri Michelon & CML http://www.e-cml.org/
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # as published by the Free Software Foundation; either version 2
12 # of the License, or (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details (read LICENSE.txt).
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 # $Id: Lexical.py,v 1.9 2008/03/12 20:26:23 hmichelon Exp $
26 # http://www.postgresql.org/docs/8.0/static/sql-syntax.html
28 # basic characters sets
29 SPACES = [ ' ', '\t', '\n' ]
30 DIGITS = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
31 NUMERIC = DIGITS + [ 'e', '.', '+', '-' ]
32 OPERATOR_CHARS = [ '+', '-', '*', '/', '<', '>', '=', '~', '!', \
33 '@', '#', '%', '^', '&', '|', '`', '?' ]
34 SPECIAL_CHARS = [ '(', ')', '[', ']', ',', ';', ':', '*', '.' ]
35 OPERATORS = OPERATOR_CHARS + SPECIAL_CHARS
37 # not the first character of an identifier
38 NOT_IDENT_START = SPECIAL_CHARS + OPERATOR_CHARS + DIGITS + [ '$' ]
39 # not a character of an identifier
40 NOT_IDENT_CHAR = SPECIAL_CHARS + OPERATOR_CHARS + SPACES + [ "'" ]
41 # not a character of a dollar quoted string
42 NOT_DOLLAR_QUOTED = [ '$' ] + SPACES
45 class Token:
47 def __init__(self, token, start_iter, end_iter, value=None):
48 self.token = token
49 self.start_iter = start_iter
50 self.end_iter = end_iter
51 self.value = value
54 class Eob:
55 """End of Buffer Exception"""
58 class Lexical:
59 """Simplified lexical analyser"""
61 def analyse(self, buffer, start, end):
62 """Run the lexical analyser"""
63 self.buffer = buffer
64 self.current = start.copy()
65 self.tokens = [];
66 try:
67 self.lexical_analyser(end.copy())
68 except Eob:
69 pass
70 return self.tokens
73 def next_char(self):
74 """Returns the next character to analyse"""
75 if (self.current.is_end()):
76 raise Eob()
77 c = self.current.get_char()
78 self.current.forward_char()
79 return c
82 def skip_spaces(self, c):
83 """Skips everything that looks like a space/tab/etc..."""
84 while (c in SPACES):
85 c = self.next_char()
86 return c
89 def string(self):
90 """Single quoted strings"""
91 start = self.current.copy()
92 start.backward_char()
93 prev = None
94 c = self.next_char()
95 try:
96 while (True):
97 if ((c == "'") and (prev != '\\')): # a single quote in the string...
98 c = self.next_char()
99 if (c != "'"):
100 break
101 prev = c
102 c = self.next_char()
103 except Eob:
104 end = self.current.copy()
105 self.tokens.append(Token('string', start, end))
106 raise
107 end = self.current.copy()
108 end.backward_char()
109 self.tokens.append(Token('string', start, end))
110 return c
113 def dollar_string(self):
114 """Dollar-quoted strings"""
115 # first bound
116 start = self.current.copy()
117 start.backward_char()
118 c = self.next_char()
119 string_tag = ''
120 try:
121 while (c not in NOT_DOLLAR_QUOTED):
122 string_tag = string_tag + c
123 c = self.next_char()
124 except Eob:
125 end = self.current.copy()
126 self.tokens.append(Token('identifier', start, end, string_tag.upper()))
127 raise
128 end = self.current.copy()
129 end.backward_char()
130 if (c != '$'):
131 self.tokens.append(Token('identifier', start, end, string_tag.upper()))
132 return c
133 self.tokens.append(Token('dollarquote', start, end, string_tag.upper()))
135 # string content
136 start = self.current.copy()
137 try:
138 c = self.next_char()
139 except Eob:
140 end = self.current.copy()
141 self.tokens.append(Token('identifier', start, end, string_tag.upper()))
142 raise
143 try:
144 while (True):
145 if (c == '$'):
146 string_end = self.current.copy()
147 c = self.next_char()
148 s = ''
149 while (c not in NOT_DOLLAR_QUOTED):
150 s = s + c
151 c = self.next_char()
152 if (s == string_tag):
153 string_end.backward_char()
154 self.tokens.append(Token('string', start, string_end))
155 end = self.current.copy()
156 end.backward_char()
157 self.tokens.append(Token('dollarquote', start, end, s.upper()))
158 return c
159 else:
160 c = self.next_char()
161 except Eob:
162 end = self.current.copy()
163 self.tokens.append(Token('string', start, end))
164 raise
165 end = self.current.copy()
166 end.backward_char()
167 self.tokens.append(Token('string', start, end))
168 return c
171 def bit_string_constant(self, start):
172 """Binary and Hexadecimal numeric constants using strings"""
173 c = self.next_char()
174 if (c == "'"):
175 c = self.next_char()
176 start = self.current.copy()
177 start.backward_char()
178 start.backward_char()
179 start.backward_char()
180 while (c != "'"):
181 c = self.next_char()
182 end = self.current.copy()
183 self.tokens.append(Token('numeric_constant', start, end))
184 return self.next_char()
185 else:
186 return self.identifier(c, start)
189 def identifier(self, c, ident = ''):
190 """An identifier, keyword, type name, etc..."""
191 start = self.current.copy()
192 for i in range(0, len(ident) + 1):
193 start.backward_char()
194 try:
195 while (c not in NOT_IDENT_CHAR):
196 ident = ident + c
197 c = self.next_char()
198 except Eob:
199 end = self.current.copy()
200 self.tokens.append(Token('identifier', start, end, ident.upper()))
201 raise
202 end = self.current.copy()
203 end.backward_char()
204 self.tokens.append(Token('identifier', start, end, ident.upper()))
205 return c
208 def numeric(self, c):
209 """A numeric constant"""
210 start = self.current.copy()
211 start.backward_char()
212 try:
213 while (c in NUMERIC):
214 c = self.next_char()
215 except Eob:
216 end = self.current.copy()
217 self.tokens.append(Token('numeric_constant', start, end))
218 raise
219 end = self.current.copy()
220 end.backward_char()
221 self.tokens.append(Token('numeric_constant', start, end))
222 return c
225 def simple_comment(self):
226 """One line comment using --"""
227 start = self.current.copy()
228 start.backward_char()
229 start.backward_char()
230 c = self.next_char()
231 try:
232 while (c != '\n'):
233 c = self.next_char()
234 except Eob:
235 end = self.current.copy()
236 self.tokens.append(Token('comment', start, end))
237 raise
238 end = self.current.copy()
239 self.tokens.append(Token('comment', start, end))
242 def comment(self):
243 """Multi lines comments using /* */"""
244 start = self.current.copy()
245 start.backward_char()
246 start.backward_char()
247 c = self.next_char()
248 prev = None
249 nested = 0
250 try:
251 while (True):
252 if (c == '*'):
253 c = self.next_char()
254 if (prev == '/'):
255 nested = nested + 1
256 continue
257 if (c == '/'):
258 if (nested == 0):
259 c = self.next_char()
260 break
261 else:
262 nested = nested - 1
263 else:
264 prev = c
265 continue
266 prev = c
267 c = self.next_char()
268 except Eob:
269 end = self.current.copy()
270 self.tokens.append(Token('comment', start, end))
271 raise
272 end = self.current.copy()
273 end.backward_char()
274 self.tokens.append(Token('comment', start, end))
275 return c
278 def psql(self):
279 """A PgSQL Command"""
280 start = self.current.copy()
281 start.backward_char()
282 c = self.next_char()
283 cmd = '\\'
284 try:
285 while (c != '\n') and (c != ';'):
286 cmd = cmd + c
287 c = self.next_char()
288 except:
289 end = self.current.copy()
290 self.tokens.append(Token('psql', start, end, cmd))
291 raise
292 end = self.current.copy()
293 self.tokens.append(Token('psql', start, end, cmd))
296 def lexical_analyser(self, fin):
297 """A simplified lexical analyser"""
298 c = self.next_char()
299 while (self.current.compare(fin) <= 0):
300 c = self.skip_spaces(c)
301 # Multi lines comments
302 if (c == '/'):
303 c = self.next_char()
304 if (c == '*'):
305 c = self.comment()
306 continue
307 else:
308 self.current.backward_char()
309 # One line comments
310 elif (c == '-'):
311 c = self.next_char()
312 if (c == '-'):
313 self.simple_comment()
314 else:
315 self.current.backward_char()
316 # psql commands
317 elif (c == '\\'):
318 self.psql()
319 # numeric
320 elif (c in DIGITS):
321 c = self.numeric(c)
322 continue
323 # bit strings
324 elif (c == 'B') or (c == 'b') or (c == 'H') or (c == 'h'):
325 c = self.bit_string_constant(c)
326 continue
327 # strings
328 elif (c == "'"):
329 c = self.string()
330 continue
331 # dollar-quoted strings
332 elif (c == '$'):
333 c = self.dollar_string()
334 continue
335 # numeric
336 elif (c == '.'):
337 c = self.next_char()
338 if (c in DIGITS):
339 self.current.backward_char()
340 c = self.numeric(self.current.get_char())
341 continue
342 # quoted identifiers
343 elif (c == '"'):
344 c = self.next_char()
345 while (c != '"'):
346 c = self.next_char()
347 # operators
348 elif (c in OPERATORS):
349 start = self.current.copy()
350 start.backward_char()
351 end = self.current.copy()
352 self.tokens.append(Token('operator', start, end, c))
353 # everything else
354 elif (c not in NOT_IDENT_START):
355 c = self.identifier(c)
356 continue
357 c = self.next_char()