initial
[prop.git] / prop-src / lexeme.cc
blob647ea397b597e281e1d28294573230b6806a3ba7
1 ///////////////////////////////////////////////////////////////////////////////
2 // This file is generated automatically using Prop (version 2.3.6),
3 // last updated on Nov 2, 1999.
4 // The original source file is "lexeme.pcc".
5 ///////////////////////////////////////////////////////////////////////////////
7 #line 1 "lexeme.pcc"
8 ///////////////////////////////////////////////////////////////////////////////
9 //
10 // This file implements some support routines for lexeme and regular
11 // expressions handling.
13 ///////////////////////////////////////////////////////////////////////////////
14 #include <string.h>
15 #include <AD/strings/charesc.h>
16 #include "ir.h"
17 #include "ast.h"
18 #include "hashtab.h"
19 #include "type.h"
20 #include "matchcom.h"
22 ///////////////////////////////////////////////////////////////////////////////
23 // Properly quote a string
24 ///////////////////////////////////////////////////////////////////////////////
25 const char * make_quoted_string (const char * s)
26 { char * new_str = str_pool[strlen(s) * 4];
27 *new_str = '"';
28 char * end_str = quote_string(new_str+1,s);
29 *end_str++ = '"';
30 *end_str = '\0';
31 return new_str;
34 ///////////////////////////////////////////////////////////////////////////////
35 // Add contexts to a string/regexp pattern
36 ///////////////////////////////////////////////////////////////////////////////
37 Pat add_contexts (Conses contexts, Pat pat)
38 { if (contexts ==
39 #line 31 "lexeme.pcc"
40 #line 31 "lexeme.pcc"
41 nil_1_
42 #line 31 "lexeme.pcc"
43 #line 31 "lexeme.pcc"
44 ) return pat;
46 #line 32 "lexeme.pcc"
47 #line 74 "lexeme.pcc"
49 if (pat) {
50 switch (pat->tag__) {
51 case a_Pat::tag_LITERALpat: {
52 switch (((Pat_LITERALpat *)pat)->LITERALpat->tag__) {
53 case a_Literal::tag_STRINGlit: {
54 #line 53 "lexeme.pcc"
55 return add_contexts(contexts,LITERALpat(REGEXPlit(convert_regexp(((Literal_STRINGlit *)((Pat_LITERALpat *)pat)->LITERALpat)->STRINGlit))));
57 #line 54 "lexeme.pcc"
58 } break;
59 case a_Literal::tag_REGEXPlit: {
60 #line 34 "lexeme.pcc"
61 char buffer[4096];
62 char * p = buffer;
63 *p++ = '/'; *p++ = '<';
64 for (Conses l = contexts; l; l = l->_2)
65 { if (l->_1) strcpy(p,l->_1->name);
66 p += strlen(p);
67 if (l->_2) *p++ = ',';
69 *p++ = '>';
70 strcpy(p,((Literal_REGEXPlit *)((Pat_LITERALpat *)pat)->LITERALpat)->REGEXPlit+1); p += strlen(p) - 1;
71 *p = '/';
72 debug_msg("converting %p into %s\n",
73 CONTEXTpat(contexts,pat), buffer);
74 Pat new_pat = LITERALpat(REGEXPlit(str_pool[buffer]));
75 new_pat->selector = pat->selector;
76 new_pat->ty = pat->ty;
77 return new_pat;
79 #line 51 "lexeme.pcc"
80 } break;
81 default: {
82 L1:;
83 #line 71 "lexeme.pcc"
84 error("%Lillegal context(s) in pattern %p\n",
85 CONTEXTpat(contexts,pat));
86 return pat;
88 #line 74 "lexeme.pcc"
89 } break;
91 } break;
92 case a_Pat::tag_LEXEMEpat: {
93 #line 56 "lexeme.pcc"
94 Pat this_pat = expand_lexeme_pat(pat,((Pat_LEXEMEpat *)pat)->_2,((Pat_LEXEMEpat *)pat)->_3,((Pat_LEXEMEpat *)pat)->_4);
95 Pat new_pat = NOpat;
97 #line 58 "lexeme.pcc"
98 #line 64 "lexeme.pcc"
100 for (;;) {
101 if (this_pat) {
102 switch (this_pat->tag__) {
103 case a_Pat::tag_LOGICALpat: {
104 switch (((Pat_LOGICALpat *)this_pat)->_1) {
105 case ORpat: {
106 #line 60 "lexeme.pcc"
107 Pat one_pat = add_contexts(contexts,((Pat_LOGICALpat *)this_pat)->_3);
108 new_pat = new_pat == NOpat ? one_pat
109 : LOGICALpat(ORpat,one_pat,new_pat);
110 this_pat = ((Pat_LOGICALpat *)this_pat)->_2;
112 #line 64 "lexeme.pcc"
113 } break;
114 default: { goto L2; } break;
116 } break;
117 default: { goto L2; } break;
119 } else { goto L2; }
121 L2:;
123 #line 65 "lexeme.pcc"
124 #line 65 "lexeme.pcc"
126 Pat p = add_contexts(contexts,this_pat);
127 new_pat = new_pat == NOpat ? p : LOGICALpat(ORpat,p,new_pat);
128 return new_pat;
130 #line 69 "lexeme.pcc"
131 } break;
132 default: { goto L1; } break;
134 } else { goto L1; }
136 #line 75 "lexeme.pcc"
137 #line 75 "lexeme.pcc"
141 ///////////////////////////////////////////////////////////////////////////////
142 // Convert a string pattern into a regexp pattern if necessary
143 ///////////////////////////////////////////////////////////////////////////////
144 #line 81 "lexeme.pcc"
145 #line 85 "lexeme.pcc"
146 Pat convert_regexp (Pat x_1);
147 Pat convert_regexp (Pat x_1)
149 if (x_1) {
150 switch (x_1->tag__) {
151 case a_Pat::tag_LITERALpat: {
152 switch (((Pat_LITERALpat *)x_1)->LITERALpat->tag__) {
153 case a_Literal::tag_STRINGlit: {
154 #line 82 "lexeme.pcc"
155 return LITERALpat(REGEXPlit(convert_regexp(((Literal_STRINGlit *)((Pat_LITERALpat *)x_1)->LITERALpat)->STRINGlit)));
156 #line 82 "lexeme.pcc"
157 } break;
158 default: {
159 L3:;
160 #line 84 "lexeme.pcc"
161 return x_1;
162 #line 84 "lexeme.pcc"
163 } break;
165 } break;
166 default: { goto L3; } break;
168 } else { goto L3; }
170 #line 85 "lexeme.pcc"
171 #line 85 "lexeme.pcc"
174 ///////////////////////////////////////////////////////////////////////////////
175 // Convert a string literal into a regular expression literal.
176 ///////////////////////////////////////////////////////////////////////////////
177 const char * convert_regexp (const char * string, Bool quotes)
178 { char buf[4096];
179 register char * p = buf;
180 register const char * s = string + 1;
181 if (quotes) *p++ = '/';
182 while (*s) {
183 char c;
184 s = parse_char(s,c);
186 #line 98 "lexeme.pcc"
187 #line 103 "lexeme.pcc"
189 switch (c) {
190 case '$':
191 case '(':
192 case ')':
193 case '*':
194 case '+':
195 case '.':
196 case '/':
197 case '<':
198 case '>':
199 case '?':
200 case '[':
201 case '\\':
202 case ']':
203 case '^':
204 case '{':
205 case '|':
206 case '}': {
207 L4:;
208 #line 102 "lexeme.pcc"
209 *p++ = '\\'; *p++ = c;
210 #line 102 "lexeme.pcc"
211 } break;
212 default: {
213 #line 103 "lexeme.pcc"
214 p = print_char(p,c);
215 #line 103 "lexeme.pcc"
219 #line 104 "lexeme.pcc"
220 #line 104 "lexeme.pcc"
223 p -= 2;
224 if (quotes) *p++ = '/';
225 *p = '\0';
226 debug_msg("converting %s into %s\n", string, buf);
227 return str_pool[buf];
230 ///////////////////////////////////////////////////////////////////////////////
231 // The global lexeme and lexeme class environments
232 ///////////////////////////////////////////////////////////////////////////////
233 static HashTable lexeme_env(string_hash, string_equal);
234 static HashTable lexeme_class_env(string_hash, string_equal);
236 ///////////////////////////////////////////////////////////////////////////////
237 // Method to update or lookup from the lexeme class environment
238 ///////////////////////////////////////////////////////////////////////////////
239 void update_lexeme_class (Id id, TermDefs terms)
240 { if (lexeme_class_env.contains(id))
241 { error ("%Lredefinition of lexeme class %s\n", id); }
242 else
243 { lexeme_class_env.insert(id, terms); }
246 TermDefs lookup_lexeme_class (Id id)
247 { HashTable::Entry * e = lexeme_class_env.lookup(id);
248 if (e) return TermDefs(e->v);
249 else { error("%Llexeme class %s is undefined\n", id); return
250 #line 132 "lexeme.pcc"
251 #line 132 "lexeme.pcc"
252 nil_1_
253 #line 132 "lexeme.pcc"
254 #line 132 "lexeme.pcc"
258 ///////////////////////////////////////////////////////////////////////////////
259 // Method to expand a pattern taking into account of lexeme definitions.
260 ///////////////////////////////////////////////////////////////////////////////
261 Pat mk_regexp_pat(const char * re)
262 { const char * p = re;
263 char buf[4096];
264 char * q = buf;
265 if (strchr(re,'{') != 0)
267 // skip context
268 if (*p == '<')
269 { do { *q++ = *p; } while (*p && *p++ != '>'); }
271 #line 147 "lexeme.pcc"
272 #line 189 "lexeme.pcc"
274 for (;;) {
275 char _V1 = (*p);
276 switch (_V1) {
277 case '\000': { goto L5; } break;
278 case '[': {
279 #line 148 "lexeme.pcc"
280 do { *q++ = *p++; }
281 while (p[-1] && (p[-2] == '\\' || p[-1] != ']'));
283 #line 150 "lexeme.pcc"
284 } break;
285 case '{': {
286 #line 151 "lexeme.pcc"
287 char name[256]; char * r;
288 for (r = name, p++; *p && *p != '}';) *r++ = *p++;
289 if (*p == '}') p++;
290 else error ("%Lmissing '}' in regular expression %s\n",re);
291 *r = '\0';
292 HashTable::Entry * e = lexeme_env.lookup(name);
293 if (e)
294 { Pat pattern = (Pat)e->v;
296 #line 159 "lexeme.pcc"
297 #line 169 "lexeme.pcc"
299 if (pattern) {
300 switch (pattern->tag__) {
301 case a_Pat::tag_LITERALpat: {
302 switch (((Pat_LITERALpat *)pattern)->LITERALpat->tag__) {
303 case a_Literal::tag_STRINGlit: {
304 #line 165 "lexeme.pcc"
305 *q++ = '('; strcpy(q, convert_regexp(((Literal_STRINGlit *)((Pat_LITERALpat *)pattern)->LITERALpat)->STRINGlit,false));
306 q += strlen(q) - 1; *q++ = ')';
308 #line 167 "lexeme.pcc"
309 } break;
310 case a_Literal::tag_REGEXPlit: {
311 #line 161 "lexeme.pcc"
312 *q++ = '('; strcpy(q,((Literal_REGEXPlit *)((Pat_LITERALpat *)pattern)->LITERALpat)->REGEXPlit+1); q += strlen(((Literal_REGEXPlit *)((Pat_LITERALpat *)pattern)->LITERALpat)->REGEXPlit)-2;
313 *q++ = ')';
315 #line 163 "lexeme.pcc"
316 } break;
317 default: {
318 L6:; } break;
320 } break;
321 default: { goto L6; } break;
323 } else { goto L6; }
325 #line 169 "lexeme.pcc"
326 #line 169 "lexeme.pcc"
328 } else
329 { error ("%Llexeme {%s} is undefined in %s\n", name, re);
332 #line 173 "lexeme.pcc"
333 } break;
334 default: {
335 #line 174 "lexeme.pcc"
336 char ch;
337 const char * r = parse_char(p,ch);
338 // if (*p == '\\') *q++ = '\\';
339 if (*p == '\\')
341 #line 178 "lexeme.pcc"
342 #line 184 "lexeme.pcc"
344 switch (ch) {
345 case '$':
346 case '(':
347 case ')':
348 case '*':
349 case '+':
350 case '.':
351 case '/':
352 case '<':
353 case '>':
354 case '?':
355 case '[':
356 case '\\':
357 case ']':
358 case '^':
359 case '{':
360 case '|':
361 case '}': {
362 L7:;
363 #line 182 "lexeme.pcc"
364 *q++ = '\\'; *q++ = ch;
366 #line 183 "lexeme.pcc"
367 } break;
368 default: {
369 #line 184 "lexeme.pcc"
370 q = print_char(q,ch);
371 #line 184 "lexeme.pcc"
375 #line 185 "lexeme.pcc"
376 #line 185 "lexeme.pcc"
378 } else
379 { q = print_char(q,ch); }
380 p = r;
382 #line 189 "lexeme.pcc"
386 L5:;
388 #line 190 "lexeme.pcc"
389 #line 190 "lexeme.pcc"
391 *q++;
392 debug_msg("%s ==> %s\n", re, buf);
393 re = buf;
395 return LITERALpat(REGEXPlit(str_pool[re]));
398 ///////////////////////////////////////////////////////////////////////////////
399 // Method to update the lexeme environment.
400 ///////////////////////////////////////////////////////////////////////////////
401 void update_lexeme(Id id, Ids args, Pat pat)
402 { HashTable::Entry * e = lexeme_env.lookup(id);
403 if (e)
404 { error("%Llexeme {%s} already defined as %p\n",id,(Pat)e->v);
405 } else
406 { lexeme_env.insert(id,pat);
410 ///////////////////////////////////////////////////////////////////////////////
411 // Method to expand a lexeme pattern into a set of disjunctive patterns.
412 ///////////////////////////////////////////////////////////////////////////////
413 Pat expand_lexeme_pat(Pat pat, Ty ty, int n, Cons terms[])
415 #line 214 "lexeme.pcc"
416 #line 237 "lexeme.pcc"
418 Ty _V2 = deref_all(ty);
419 if (_V2) {
420 switch (_V2->tag__) {
421 case a_Ty::tag_TYCONty: {
422 if (boxed(((Ty_TYCONty *)_V2)->_1)) {
423 switch (((Ty_TYCONty *)_V2)->_1->tag__) {
424 case a_TyCon::tag_DATATYPEtycon: {
425 if (
426 #line 216 "lexeme.pcc"
427 ((((TyCon_DATATYPEtycon *)((Ty_TYCONty *)_V2)->_1)->qualifiers | QUALlexeme) == 0)
428 #line 216 "lexeme.pcc"
431 #line 217 "lexeme.pcc"
432 error ("%Ldatatype %T is not a lexeme type\n", ty);
433 #line 217 "lexeme.pcc"
434 } else {
436 #line 219 "lexeme.pcc"
437 Pat disj_pats = NOpat;
438 for (int i = 0; i < n; i++)
440 #line 221 "lexeme.pcc"
441 #line 233 "lexeme.pcc"
443 Cons _V3 = terms[i];
444 if (_V3) {
445 if (_V3->lexeme_pattern) {
446 #line 223 "lexeme.pcc"
447 Pat new_pat = subst(convert_regexp(_V3->lexeme_pattern),0,true);
448 disj_pats = (disj_pats == NOpat) ? new_pat
449 : LOGICALpat(ORpat,disj_pats, new_pat);
450 disj_pats->selector = pat->selector;
452 #line 227 "lexeme.pcc"
453 } else {
454 #line 229 "lexeme.pcc"
455 error ("%Llexeme pattern is undefined for constructor %s\n",
456 _V3->name);
458 #line 231 "lexeme.pcc"
460 } else {}
462 #line 233 "lexeme.pcc"
463 #line 233 "lexeme.pcc"
466 return disj_pats;
468 #line 236 "lexeme.pcc"
470 } break;
471 default: {
472 L8:;
473 #line 237 "lexeme.pcc"
474 error ("%Lnon lexeme type %T in pattern %p\n", ty, pat);
475 #line 237 "lexeme.pcc"
476 } break;
478 } else { goto L8; }
479 } break;
480 default: { goto L8; } break;
482 } else {}
484 #line 238 "lexeme.pcc"
485 #line 238 "lexeme.pcc"
487 return WILDpat();
489 #line 241 "lexeme.pcc"
491 ------------------------------- Statistics -------------------------------
492 Merge matching rules = yes
493 Number of DFA nodes merged = 197
494 Number of ifs generated = 9
495 Number of switches generated = 13
496 Number of labels = 6
497 Number of gotos = 8
498 Adaptive matching = enabled
499 Fast string matching = disabled
500 Inline downcasts = enabled
501 --------------------------------------------------------------------------