Fix obsolete comment regarding FSM truncation.
[PostgreSQL.git] / src / interfaces / ecpg / preproc / parse.pl
blob30cb7e5273e1daad58218ca181943a3044755fa0
1 #!/usr/bin/perl
2 # $PostgreSQL$
3 # parser generater for ecpg
4 # call with backend parser as stdin
6 # Copyright (c) 2007-2008, PostgreSQL Global Development Group
8 # Written by Mike Aubury <mike.aubury@aubit.com>
9 # Michael Meskes <meskes@postgresql.org>
11 # Placed under the same license as PostgreSQL.
14 if (@ARGV) {
15 $path = $ARGV[0];
16 shift @ARGV;
19 if ($path eq '') { $path = "."; }
21 $[ = 1; # set array base to 1
22 $, = ' '; # set output field separator
23 $\ = "\n"; # set output record separator
25 $copymode = 'off';
26 $brace_indent = 0;
27 $yaccmode = 0;
28 $header_included = 0;
29 $feature_not_supported = 0;
30 $tokenmode = 0;
32 # some token have to be replaced by other symbols
33 # either in the rule
34 $replace_token{'BCONST'} = 'ecpg_bconst';
35 $replace_token{'FCONST'} = 'ecpg_fconst';
36 $replace_token{'Sconst'} = 'ecpg_sconst';
37 $replace_token{'IDENT'} = 'ecpg_ident';
38 $replace_token{'PARAM'} = 'ecpg_param';
39 # or in the block
40 $replace_string{'WITH_TIME'} = 'with time';
41 $replace_string{'NULLS_FIRST'} = 'nulls first';
42 $replace_string{'NULLS_LAST'} = 'nulls last';
43 $replace_string{'TYPECAST'} = '::';
45 # specific replace_types for specific non-terminals - never include the ':'
46 # ECPG-only replace_types are defined in ecpg-replace_types
47 $replace_types{'PrepareStmt'} = '<prep>';
48 $replace_types{'opt_array_bounds'} = '<index>';
49 # "ignore" means: do not create type and rules for this non-term-id
50 $replace_types{'stmtblock'} = 'ignore';
51 $replace_types{'stmtmulti'} = 'ignore';
52 $replace_types{'CreateAsStmt'} = 'ignore';
53 $replace_types{'DeallocateStmt'} = 'ignore';
54 $replace_types{'RuleStmt'} = 'ignore';
55 $replace_types{'ColLabel'} = 'ignore';
56 $replace_types{'unreserved_keyword'} = 'ignore';
57 $replace_types{'Sconst'} = 'ignore';
59 # some production rules have to be ignored or replaced
60 $replace_line{'fetch_direction'} = 'ignore';
61 $replace_line{"opt_array_boundsopt_array_bounds'['Iconst']'"} = 'ignore';
62 $replace_line{'col_name_keywordCHAR_P'} = 'ignore';
63 $replace_line{'col_name_keywordINT_P'} = 'ignore';
64 $replace_line{'col_name_keywordVALUES'} = 'ignore';
65 $replace_line{'reserved_keywordTO'} = 'ignore';
66 $replace_line{'reserved_keywordUNION'} = 'ignore';
67 $replace_line{'VariableShowStmtSHOWvar_name'} = 'SHOW var_name ecpg_into';
68 $replace_line{'VariableShowStmtSHOWTIMEZONE'} = 'SHOW TIME ZONE ecpg_into';
69 $replace_line{'VariableShowStmtSHOWTRANSACTIONISOLATIONLEVEL'} = 'SHOW TRANSACTION ISOLATION LEVEL ecpg_into';
70 $replace_line{'VariableShowStmtSHOWSESSIONAUTHORIZATION'} = 'SHOW SESSION AUTHORIZATION ecpg_into';
71 $replace_line{'ExecuteStmtEXECUTEnameexecute_param_clause'} = 'EXECUTE prepared_name execute_param_clause execute_rest';
72 $replace_line{'ExecuteStmtCREATEOptTempTABLEcreate_as_targetASEXECUTEnameexecute_param_clause'} = 'CREATE OptTemp TABLE create_as_target AS EXECUTE prepared_name execute_param_clause';
73 $replace_line{'PrepareStmtPREPAREnameprep_type_clauseASPreparableStmt'} = 'PREPARE prepared_name prep_type_clause AS PreparableStmt';
74 $replace_line{'var_nameColId'} = 'ECPGColId';
76 line: while (<>) {
77 chomp; # strip record separator
78 @Fld = split(' ', $_, -1);
80 # Dump the action for a rule -
81 # mode indicates if we are processing the 'stmt:' rule (mode==0 means normal, mode==1 means stmt:)
82 # flds are the fields to use. These may start with a '$' - in which case they are the result of a previous non-terminal
83 # if they dont start with a '$' then they are token name
85 # len is the number of fields in flds...
86 # leadin is the padding to apply at the beginning (just use for formatting)
88 if (/ERRCODE_FEATURE_NOT_SUPPORTED/) {
89 $feature_not_supported = 1;
90 next line;
93 if (/^%%/) {
94 $tokenmode = 2;
95 $copymode = 'on';
96 $yaccmode++;
97 $infield = 0;
98 $fieldcount = 0;
101 $S = $_;
102 $prec = 0;
103 # Make sure any braces are split
104 $s = '{', $S =~ s/$s/ { /g;
105 $s = '}', $S =~ s/$s/ } /g;
106 # Any comments are split
107 $s = '[/][*]', $S =~ s#$s# /* #g;
108 $s = '[*][/]', $S =~ s#$s# */ #g;
110 # Now split the line into individual fields
111 $n = (@arr = split(' ', $S));
113 if ($arr[1] eq '%token' && $tokenmode == 0) {
114 $tokenmode = 1;
115 &include_stuff('tokens', 'ecpg.tokens', '', 1, 0);
116 $type = 1;
118 elsif ($arr[1] eq '%type' && $header_included == 0) {
119 &include_stuff('header', 'ecpg.header', '', 1, 0);
120 &include_stuff('ecpgtype', 'ecpg.type', '', 1, 0);
121 $header_included = 1;
124 if ($tokenmode == 1) {
125 $str = '';
126 for ($a = 1; $a <= $n; $a++) {
127 if ($arr[$a] eq '/*') {
128 $comment++;
129 next;
131 if ($arr[$a] eq '*/') {
132 $comment--;
133 next;
135 if ($comment) {
136 next;
138 if (substr($arr[$a], 1, 1) eq '<') {
139 next;
140 # its a type
142 $tokens{$arr[$a]} = 1;
144 $str = $str . ' ' . $arr[$a];
145 if ($arr[$a] eq 'IDENT' && $arr[$a - 1] eq '%nonassoc') {
146 # add two more tokens to the list
147 $str = $str . "\n%nonassoc CSTRING\n%nonassoc UIDENT";
150 &add_to_buffer('orig_tokens', $str);
151 next line;
154 # Dont worry about anything if we're not in the right section of gram.y
155 if ($yaccmode != 1) {
156 next line;
159 # Go through each field in turn
160 for ($fieldIndexer = 1; $fieldIndexer <= $n; $fieldIndexer++) {
161 if ($arr[$fieldIndexer] eq '*/' && $comment) {
162 $comment = 0;
163 next;
165 elsif ($comment) {
166 next;
168 elsif ($arr[$fieldIndexer] eq '/*') {
169 # start of a multiline comment
170 $comment = 1;
171 next;
173 elsif ($arr[$fieldIndexer] eq '//') {
174 next line;
176 elsif ($arr[$fieldIndexer] eq '}') {
177 $brace_indent--;
178 next;
180 elsif ($arr[$fieldIndexer] eq '{') {
181 $brace_indent++;
182 next;
185 if ($brace_indent > 0) {
186 next;
188 if ($arr[$fieldIndexer] eq ';') {
189 if ($copymode eq 'on') {
190 if ($infield && $includetype eq '') {
191 &dump_line($stmt_mode, $fields, $field_count);
193 &add_to_buffer('rules', ";\n\n");
195 else {
196 $copymode = 'on';
198 $field_count = 0;
199 $infield = 0;
200 $line = '';
201 $includetype = '';
202 next;
205 if ($arr[$fieldIndexer] eq '|') {
206 if ($copymode eq 'on') {
207 if ($infield && $includetype eq '') {
208 $infield = $infield + &dump_line($stmt_mode, $fields, $field_count);
210 if ($infield > 1) {
211 $line = '| ';
214 $field_count = 0;
215 $includetype = '';
216 next;
219 if ($replace_token{$arr[$fieldIndexer]}) {
220 $arr[$fieldIndexer] = $replace_token{$arr[$fieldIndexer]};
223 # Are we looking at a declaration of a non-terminal ?
224 if (($arr[$fieldIndexer] =~ '[A-Za-z0-9]+:') || $arr[$fieldIndexer + 1] eq ':') {
225 $non_term_id = $arr[$fieldIndexer];
226 $s = ':', $non_term_id =~ s/$s//g;
228 if ($replace_types{$non_term_id} eq '') {
229 $replace_types{$non_term_id} = '<str>';
231 if ($replace_types{$non_term_id} eq 'ignore') {
232 $copymode = ';';
233 $line = '';
234 next line;
236 else {
237 $copymode = 'on';
239 $line = $line . ' ' . $arr[$fieldIndexer];
240 # Do we have the : attached already ?
241 # If yes, we'll have already printed the ':'
242 if (!($arr[$fieldIndexer] =~ '[A-Za-z0-9]+:')) {
243 # Consume the ':' which is next...
244 $line = $line . ':';
245 $fieldIndexer++;
248 # Special mode?
249 if ($non_term_id eq 'stmt') {
250 $stmt_mode = 1;
252 else {
253 $stmt_mode = 0;
255 $tstr = '%type ' . $replace_types{$non_term_id} . ' ' . $non_term_id;
256 &add_to_buffer('types', $tstr);
258 if ($copymode eq 'on') {
259 &add_to_buffer('rules', $line);
261 $line = '';
262 $field_count = 0;
263 $infield = 1;
264 next;
266 elsif ($copymode eq 'on') {
267 $line = $line . ' ' . $arr[$fieldIndexer];
269 if ($arr[$fieldIndexer] eq '%prec') {
270 $prec = 1;
271 next;
274 if ($copymode eq 'on' && !$prec && !$comment && $arr[$fieldIndexer] ne '/*EMPTY*/' && length($arr[$fieldIndexer]) && $infield) {
275 $nfield = $field_count + 1;
276 if ($arr[$fieldIndexer] ne 'Op' && ($tokens{$arr[$fieldIndexer]} > 0 || $arr[$fieldIndexer] =~ "'.+'") || $stmt_mode == 1) {
277 if ($replace_string{$arr[$fieldIndexer]}) {
278 $S = $replace_string{$arr[$fieldIndexer]};
280 else {
281 $S = $arr[$fieldIndexer];
283 $s = '_P', $S =~ s/$s//g;
284 $s = "'", $S =~ s/$s//g;
285 if ($stmt_mode == 1) {
286 $fields{$field_count++} = $S;
288 else {
289 $fields{$field_count++} = lc($S);
292 else {
293 $fields{$field_count++} = "\$" . $nfield;
299 &dump('header');
300 &dump('tokens');
301 &dump('types');
302 &dump('ecpgtype');
303 &dump('orig_tokens');
304 print '%%';
305 print 'prog: statements;';
306 &dump('rules');
307 &include_stuff('trailer', 'ecpg.trailer', '', 1, 0);
308 &dump('trailer');
310 sub include_stuff {
311 local($includestream, $includefilename, $includeblock, $copy, $field_count) = @_;
312 $copied = 0;
313 $inblock = 0;
314 $filename = $path . "/" . $includefilename;
315 while (($_ = &Getline2($filename),$getline_ok)) {
316 if ($includeblock ne '' && $Fld[1] eq 'ECPG:' && $inblock == 0) {
317 if ($Fld[2] eq $includeblock) {
318 $copy = 1;
319 $inblock = 1;
320 $includetype = $Fld[3];
321 if ($includetype eq 'rule') {
322 &dump_fields($stmt_mode, *fields, $field_count, ' { ');
324 elsif ($includetype eq 'addon') {
325 &add_to_buffer('rules', ' { ');
328 else {
329 $copy = 0;
332 else {
333 if ($copy == 1 && $Fld[1] ne 'ECPG:') {
334 &add_to_buffer($includestream, $_);
335 $copied = 1;
336 $inblock = 0;
340 delete $opened{$filename} && close($filename);
341 if ($includetype eq 'addon') {
342 &dump_fields($stmt_mode, *fields, $field_count, '');
344 if ($copied == 1) {
345 $field_count = 0;
346 $line = '';
348 $copied;
351 sub add_to_buffer {
352 local($buffer, $str) = @_;
353 $buff{$buffer, $buffcnt{$buffer}++} = $str;
356 sub dump {
357 local($buffer) = @_;
358 print '/* ' . $buffer . ' */';
359 for ($a = 0; $a < $buffcnt{$buffer}; $a++) {
360 print $buff{$buffer, $a};
364 sub dump_fields {
365 local($mode, *flds, $len, $ln) = @_;
366 if ($mode == 0) {
367 #Normal
368 &add_to_buffer('rules', $ln);
369 if ($feature_not_supported == 1) {
370 # we found an unsupported feature, but we have to
371 # filter out ExecuteStmt: CREATE OptTemp TABLE ...
372 # because the warning there is only valid in some situations
373 if ($flds{0} ne 'create' || $flds{2} ne 'table') {
374 &add_to_buffer('rules', "mmerror(PARSE_ERROR, ET_WARNING, \"unsupported feature will be passed to backend\\n\");");
376 $feature_not_supported = 0;
379 if ($len == 0) {
380 # We have no fields ?
381 &add_to_buffer('rules', " \$\$=EMPTY; }");
383 else {
384 # Go through each field and try to 'aggregate' the tokens into a single 'make_str' where possible
385 $cnt = 0;
386 for ($z = 0; $z < $len; $z++) {
387 if (substr($flds{$z}, 1, 1) eq "\$") {
388 $flds_new{$cnt++} = $flds{$z};
389 next;
392 $str = $flds{$z};
394 while (1) {
395 if ($z >= $len - 1 || substr($flds{$z + 1}, 1, 1) eq "\$") {
396 # We're at the end...
397 $flds_new{$cnt++} = "make_str(\"" . $str . "\")";
398 last;
400 $z++;
401 $str = $str . ' ' . $flds{$z};
405 # So - how many fields did we end up with ?
406 if ($cnt == 1) {
407 # Straight assignement
408 $str = " \$\$ = " . $flds_new{0} . ';';
409 &add_to_buffer('rules', $str);
411 else {
412 # Need to concatenate the results to form
413 # our final string
414 $str = " \$\$ = cat_str(" . $cnt;
416 for ($z = 0; $z < $cnt; $z++) {
417 $str = $str . ',' . $flds_new{$z};
419 $str = $str . ');';
420 &add_to_buffer('rules', $str);
422 if ($literal_mode == 0) {
423 &add_to_buffer('rules', '}');
427 else {
428 # we're in the stmt: rule
429 if ($len) {
430 # or just the statement ...
431 &add_to_buffer('rules', " { output_statement(\$1, 0, ECPGst_normal); }");
433 else {
434 &add_to_buffer('rules', " { \$\$ = NULL; }");
439 sub generate_block {
440 local($line) = @_;
441 $block = $non_term_id . $line;
442 $s = ' ', $block =~ s/$s//g;
443 $s = "\\|", $block =~ s/$s//g;
444 return $block;
447 sub dump_line {
448 local($stmt_mode, $fields, $field_count) = @_;
449 $block = &generate_block($line);
450 if ($replace_line{$block} eq 'ignore') {
451 return 0;
453 elsif ($replace_line{$block}) {
454 if (index($line, '|') != 0) {
455 $line = '| ' . $replace_line{$block};
457 else {
458 $line = $replace_line{$block};
460 $block = &generate_block($line);
462 &add_to_buffer('rules', $line);
463 $i = &include_stuff('rules', 'ecpg.addons', $block, 0, $field_count);
464 if ($i == 0) {
465 &dump_fields($stmt_mode, *fields, $field_count, ' { ');
467 return 1;
470 sub Getline2 {
471 &Pick('',@_);
472 if ($getline_ok = (($_ = <$fh>) ne '')) {
473 chomp; # strip record separator
474 @Fld = split(' ', $_, -1);
479 sub Pick {
480 local($mode,$name,$pipe) = @_;
481 $fh = $name;
482 open($name,$mode.$name.$pipe) unless $opened{$name}++;