1 ///////////////////////////////////////////////////////////////////////////////
3 // This file implements some support routines for lexeme and regular
4 // expressions handling.
6 ///////////////////////////////////////////////////////////////////////////////
8 #include <AD/strings/charesc.h>
15 ///////////////////////////////////////////////////////////////////////////////
16 // Properly quote a string
17 ///////////////////////////////////////////////////////////////////////////////
18 const char * make_quoted_string (const char * s)
19 { char * new_str = str_pool[strlen(s) * 4];
21 char * end_str = quote_string(new_str+1,s);
27 ///////////////////////////////////////////////////////////////////////////////
28 // Add contexts to a string/regexp pattern
29 ///////////////////////////////////////////////////////////////////////////////
30 Pat add_contexts (Conses contexts, Pat pat)
31 { if (contexts == #[]) return pat;
36 *p++ = '/'; *p++ = '<';
37 for (Conses l = contexts; l; l = l->#2)
38 { if (l->#1) strcpy(p,l->#1->name);
40 if (l->#2) *p++ = ',';
43 strcpy(p,re+1); p += strlen(p) - 1;
45 debug_msg("converting %p into %s\n",
46 CONTEXTpat(contexts,pat), buffer);
47 Pat new_pat = LITERALpat(REGEXPlit(str_pool[buffer]));
48 new_pat->selector = pat->selector;
49 new_pat->ty = pat->ty;
53 { return add_contexts(contexts,LITERALpat(REGEXPlit(convert_regexp(s))));
55 | LEXEMEpat(_, ty, n, cs):
56 { Pat this_pat = expand_lexeme_pat(pat,ty,n,cs);
58 match while (this_pat)
59 { LOGICALpat(ORpat,a,b):
60 { Pat one_pat = add_contexts(contexts,b);
61 new_pat = new_pat == NOpat ? one_pat
62 : LOGICALpat(ORpat,one_pat,new_pat);
66 Pat p = add_contexts(contexts,this_pat);
67 new_pat = new_pat == NOpat ? p : LOGICALpat(ORpat,p,new_pat);
71 { error("%Lillegal context(s) in pattern %p\n",
72 CONTEXTpat(contexts,pat));
78 ///////////////////////////////////////////////////////////////////////////////
79 // Convert a string pattern into a regexp pattern if necessary
80 ///////////////////////////////////////////////////////////////////////////////
81 fun convert_regexp STRINGpat s: Pat:
82 { return LITERALpat(REGEXPlit(convert_regexp(s))); }
87 ///////////////////////////////////////////////////////////////////////////////
88 // Convert a string literal into a regular expression literal.
89 ///////////////////////////////////////////////////////////////////////////////
90 const char * convert_regexp (const char * string, Bool quotes)
92 register char * p = buf;
93 register const char * s = string + 1;
94 if (quotes) *p++ = '/';
98 match (c) // escape all meta characters
99 { '\\' || '[' || ']' || '*' || '+' || '(' || ')' || '$' ||
100 '^' || '?' || '.' || '{' || '}' || '/' || '<' || '>' ||
102 { *p++ = '\\'; *p++ = c; }
103 | _: { p = print_char(p,c); }
107 if (quotes) *p++ = '/';
109 debug_msg("converting %s into %s\n", string, buf);
110 return str_pool[buf];
113 ///////////////////////////////////////////////////////////////////////////////
114 // The global lexeme and lexeme class environments
115 ///////////////////////////////////////////////////////////////////////////////
116 static HashTable lexeme_env(string_hash, string_equal);
117 static HashTable lexeme_class_env(string_hash, string_equal);
119 ///////////////////////////////////////////////////////////////////////////////
120 // Method to update or lookup from the lexeme class environment
121 ///////////////////////////////////////////////////////////////////////////////
122 void update_lexeme_class (Id id, TermDefs terms)
123 { if (lexeme_class_env.contains(id))
124 { error ("%Lredefinition of lexeme class %s\n", id); }
126 { lexeme_class_env.insert(id, terms); }
129 TermDefs lookup_lexeme_class (Id id)
130 { HashTable::Entry * e = lexeme_class_env.lookup(id);
131 if (e) return TermDefs(e->v);
132 else { error("%Llexeme class %s is undefined\n", id); return #[]; }
135 ///////////////////////////////////////////////////////////////////////////////
136 // Method to expand a pattern taking into account of lexeme definitions.
137 ///////////////////////////////////////////////////////////////////////////////
138 Pat mk_regexp_pat(const char * re)
139 { const char * p = re;
142 if (strchr(re,'{') != 0)
146 { do { *q++ = *p; } while (*p && *p++ != '>'); }
148 { '[': { do { *q++ = *p++; }
149 while (p[-1] && (p[-2] == '\\' || p[-1] != ']'));
151 | '{': { char name[256]; char * r;
152 for (r = name, p++; *p && *p != '}';) *r++ = *p++;
154 else error ("%Lmissing '}' in regular expression %s\n",re);
156 HashTable::Entry * e = lexeme_env.lookup(name);
158 { Pat pattern = (Pat)e->v;
160 { LITERALpat(REGEXPlit re):
161 { *q++ = '('; strcpy(q,re+1); q += strlen(re)-2;
164 | LITERALpat(STRINGlit s):
165 { *q++ = '('; strcpy(q, convert_regexp(s,false));
166 q += strlen(q) - 1; *q++ = ')';
171 { error ("%Llexeme {%s} is undefined in %s\n", name, re);
175 const char * r = parse_char(p,ch);
176 // if (*p == '\\') *q++ = '\\';
178 { match (ch) // escape all meta characters
179 { '\\' || '[' || ']' || '*' || '+' || '(' || ')' ||
180 '$' || '^' || '?' || '.' || '{' || '}' || '/' ||
182 { *q++ = '\\'; *q++ = ch;
184 | _: { q = print_char(q,ch); }
187 { q = print_char(q,ch); }
192 debug_msg("%s ==> %s\n", re, buf);
195 return LITERALpat(REGEXPlit(str_pool[re]));
198 ///////////////////////////////////////////////////////////////////////////////
199 // Method to update the lexeme environment.
200 ///////////////////////////////////////////////////////////////////////////////
201 void update_lexeme(Id id, Ids args, Pat pat)
202 { HashTable::Entry * e = lexeme_env.lookup(id);
204 { error("%Llexeme {%s} already defined as %p\n",id,(Pat)e->v);
206 { lexeme_env.insert(id,pat);
210 ///////////////////////////////////////////////////////////////////////////////
211 // Method to expand a lexeme pattern into a set of disjunctive patterns.
212 ///////////////////////////////////////////////////////////////////////////////
213 Pat expand_lexeme_pat(Pat pat, Ty ty, int n, Cons terms[])
214 { match (deref_all(ty))
216 | DATATYPEty({ qualifiers ... },_) | ((qualifiers | QUALlexeme) == 0):
217 { error ("%Ldatatype %T is not a lexeme type\n", ty); }
218 | DATATYPEty({ ... },_):
219 { Pat disj_pats = NOpat;
220 for (int i = 0; i < n; i++)
222 { ONEcons { lexeme_pattern = p as ! NOpat ... }:
223 { Pat new_pat = subst(convert_regexp(p),0,true);
224 disj_pats = (disj_pats == NOpat) ? new_pat
225 : LOGICALpat(ORpat,disj_pats, new_pat);
226 disj_pats->selector = pat->selector;
228 | ONEcons { name ... }:
229 { error ("%Llexeme pattern is undefined for constructor %s\n",
237 | _: { error ("%Lnon lexeme type %T in pattern %p\n", ty, pat); }