switch -O2 to -O1 for zetacom compilation
[xoc.git] / zeta / parse.c
blob8d94ab4a056cb61012c65a1217dee6c7be7740a1
1 // Parsing using grammars.
3 #include "a.h"
4 #include "type.h"
5 #include "runtime.h"
6 #include "grammar.h"
7 #include "listimpl.h"
9 int FLAGS_parsemergedebug = 0;
11 static ParseValue *freepvs;
13 ParseValue*
14 mkParseValue(Ast *ast)
16 ParseValue *pv;
18 if((pv = freepvs) != nil)
19 freepvs = (void*)pv->ast;
20 else
21 pv = emallocnz(sizeof(ParseValue));
22 pv->ref = 1;
23 pv->ast = ast;
24 pv->line.file = 0;
25 pv->line.line = 0;
26 return pv;
29 void
30 pvfree(CfgSym *sym, ParseValue *pv)
32 if(pv == nil || --pv->ref > 0)
33 return;
34 if(pv->ast)
35 astfree(pv->ast);
36 pv->ast = (void*)freepvs;
37 freepvs = pv;
40 void
41 pvdup(CfgSym *sym, ParseValue **a, ParseValue *b)
43 if(*a)
44 (*a)->ref++;
47 #if 0
48 void
49 pvtouch(ParseValue *pv, int pass)
51 if(pv == nil)
52 return;
54 assert(pv->ref > 0);
55 if(pv->ast)
56 assert(pv->ast->ref > 0);
58 switch(pass){
59 case 0:
60 pv->refcheck = 0;
61 if(pv->ast)
62 astrefcheck(pv->ast, 0);
63 break;
64 case 1:
65 if(pv->refcheck++ == 0)
66 if(pv->ast)
67 astrefcheck(pv->ast, 1);
68 break;
69 case 2:
70 if(pv->refcheck != -1){
71 if(pv->ref < pv->refcheck)
72 fprint(2, "%d < %d | PV %p\n", pv->ref, pv->refcheck, pv);
73 if(pv->ast)
74 astrefcheck(pv->ast, 2);
75 pv->refcheck = -1;
77 break;
80 #endif
82 //============================================================
83 // Parsing action invoked during GLR parsing.
85 static Line lastline;
86 Type *parseactiontype;
88 int
89 parseaction(CfgGram *cfg, CfgRule *rule, ParseValue **outp, ParseValue **in)
91 int i, n;
92 Ast **right, *a;
93 ParseValue *out;
95 if(!issubtype(rule->tgram, parseactiontype)){
96 //fprint(2, "rejecting: %R [%T <: %T]\n", rule, rule->tgram, parseactiontype);
97 return -1;
100 right = emalloc(rule->nright*sizeof right[0]);
101 n = 0;
102 for(i=0; i<rule->nright; i++){
103 a = in[i]->ast;
105 // Only save the ASTs that aren't literal strings.
106 // The literal strings can be inferred from the rule.
107 // (They're the "concrete" syntax, kind of.)
108 if(nametostr(rule->right[i]->name)[0] != '"')
109 right[n++] = a;
112 if(n != rule->nsavedright)
113 panic("parseaction - %R %d %d", rule, n, rule->nsavedright);
115 *outp = out = mkParseValue(mkastrule(rule, right));
116 for(i=0; i<rule->nright; i++){
117 if(out->line.file == nil)
118 out->line = in[i]->line;
120 out->ast->line = out->line;
121 if(rule->nright == 0)
122 out->line = lastline;
123 lastline = out->line;
124 return 1;
128 // ========================================================
129 // Parsing of regular text (not patterns)
131 static void lexsharp(RgParse*, char**);
133 // GLR lexer callback. Should get the next token and
134 // return a list of RgToken* representing the various
135 // ways to interpret that token. Each RgToken is a
136 // CfgSym* paired with its corresponding Ast*.
137 // The state, in vstate, is a char** pointing at the
138 // current input location (itself a char*).
139 RgTokenL*
140 lextext(RgParse *parse, CfgGram *cfg, void *v)
142 char **pp, *p, *op;
143 Gram *gram;
144 RgToken *t;
145 CfgSymL *l;
146 ParseValue *pv;
147 int len, sol;
148 char *s;
149 CfgSym *sym;
150 Name name;
151 RgTokenL *tl;
153 pp = v;
154 gram = cfg->ggram;
155 sol = parse->col == 1;
157 // Loop looking for a non-ignorable token.
159 p = *pp;
160 // check EOF
161 if(p == nil || *p == 0){
162 *pp = nil;
163 return mkRgTokenL(rgnewtoken(parse, cfg->eofsym), nil);
166 // process # line directives
167 if(sol)
168 lexsharp(parse, &p);
170 // get next token (actually token set)
171 op = p;
172 l = rundfa(gram->dfa, p, &p);
173 *pp = p;
174 if(p == op || l == nil)
175 return nil;
177 // scan lexed text to update line, column
178 sol = 0;
179 for(s=op; s<p; s++){
180 if(*s == '\n'){
181 parse->line++;
182 parse->col = 1;
183 sol = 1;
184 }else{
185 parse->col++;
186 if(sol && *s != ' ' && *s != '\t')
187 sol = 0;
190 }while(gramignoring(gram, l->hd));
192 len = p - op;
193 name = nameofn(op, len);
194 tl = nil;
195 for(; l; l=l->tl){
196 sym = l->hd;
197 t = rgnewtoken(parse, sym);
198 pv = mkParseValue(mkaststring(sym, name));
199 t->val = pv;
200 pv->line.file = parse->file;
201 pv->line.line = parse->line;
202 tl = mkRgTokenL(t, tl);
204 return revRgTokenL(tl);
207 // Parse the cpp's "# ..." directives, assuming that the current
208 // position is "^". This will eat all sol whitespace and record
209 // line number info (if present). We could do some fancier
210 // parsing of pragmas, but we won't.
211 static void
212 lexsharp(RgParse *parse, char **pp)
214 char *op, buf[512];
215 int line, i;
217 assert(pp);
218 assert(*pp);
219 op = *pp;
220 while(**pp && (**pp == ' ' || **pp == '\t'))
221 (*pp)++;
222 if(**pp != '#')
223 goto done;
224 (*pp)++;
225 // Line number
226 while(**pp && (**pp == ' ' || **pp == '\t'))
227 (*pp)++;
228 line = 0;
229 if(**pp < '0' || '9' < **pp)
230 goto eatline;
231 while(**pp && '0' <= **pp && **pp <= '9'){
232 line = line*10 + (**pp - '0');
233 (*pp)++;
235 parse->line = line - 1; // the \n will add 1
236 // File name
237 while(**pp && (**pp == ' ' || **pp == '\t'))
238 (*pp)++;
239 if(**pp != '"')
240 goto eatline;
241 (*pp)++;
242 for(i=0; **pp && **pp != '"'; i++){
243 if(**pp == '\\'){
244 (*pp)++;
245 if(!**pp)
246 break;
248 if(i >= (int)sizeof buf){
249 buf[sizeof(buf)-1] = '\0';
250 panic("file name too long near '%s'", buf);
252 buf[i] = **pp;
253 (*pp)++;
255 buf[i] = '\0';
256 parse->file = nameof(buf);
258 eatline:
259 while(**pp && **pp != '\n')
260 (*pp)++;
261 done:
262 parse->col += *pp - op;
265 // Two different parses, a and b, have been found for
266 // the same segment of text. Choose which to use
267 // and update av in place. It's worse than that, because
268 // the RNGLR parser can use an Ast before it has been
269 // fully merged. Rather than fix that (a lot of work),
270 // we make glrmerge sneakily use astswap to make sure
271 // that av->ast updates in place. This is only okay because
272 // all the ASTs manipulated were just created in the parser,
273 // so we know that no one else is pointing at them other
274 // than the values created during the parse.
275 void
276 glrmerge(CfgSym *sym, ParseValue *av, ParseValue *bv)
278 Ast *a, *b, *aa, *m;
279 int ta, tb, debug;
280 CfgPrec *aprec, *bprec;
282 a = av->ast;
283 b = bv->ast;
285 ta = -1;
286 tb = -2;
287 debug = 0;
289 // Can get these when some productions can yield empty strings.
290 // For example:
291 // grammar {
292 // S: A S "b";
293 // S: "x";
294 // A: ;
295 // }
296 // `S{xbbbbb};
297 if(a->rule && a->rule == b->rule && a->rule->nright == 0)
298 goto A_wins;
300 aprec = a->rule ? a->rule->prec : nil;
301 bprec = b->rule ? b->rule->prec : nil;
303 // Rule preference resolution.
304 if(aprec && bprec && aprec->kind == CfgPrecPrefer && bprec->kind == aprec->kind){
305 int r = cfgcompareprec(aprec, bprec);
306 if(r != 0)
307 debug = 1;
308 if(r > 0)
309 goto A_wins;
310 if(r < 0)
311 goto B_wins;
314 // Rule priority resolution is done in the action rule
315 // (because the merge-node parent actually does the resolution).
316 // Associativity resolution is also done in the action rule,
317 // so that it applies even if there is no ambiguity.
319 // Ambiguous.
320 if(FLAGS_parsemergedebug)
321 fprint(2,"---- Merge [%p.%d/%p] ----\n%R\n%R\n", aprec, aprec ? aprec->assoc : 0, bprec, a->rule, b->rule);
323 // First make a copy of a at a new location,
324 // since the merged node is going to use a's
325 // location, and we don't want it pointing at itself.
326 aa = mkAst(a->op, a->sym);
327 astswap(aa, a);
329 // Now make the merge node.
330 m = mkastmerge(aa, b);
332 // Now move the merge node into a's location.
333 astswap(a, m);
335 // Free the scraps.
336 astfree(m);
337 astfree(aa);
339 if(FLAGS_parsemergedebug)
340 fprint(2, "%#lA\n----\n", av->ast);
341 return;
343 A_wins:
344 if(FLAGS_parsemergedebug){
345 if(debug == 1)
346 fprint(2,"---- A wins (prefer) ----\n%R\n%R\n", a->rule, b->rule);
347 else if(debug == 2)
348 fprint(2,"---- A wins (assoc) ----\n[%d] %#A\n[%d] %#A\n", ta, a, tb, b);
349 else
350 fprint(2,"---- A wins ----\n%R\n%R\n", a->rule, b->rule);
352 return;
353 B_wins:
354 if(FLAGS_parsemergedebug){
355 if(debug == 1)
356 fprint(2,"---- B wins (prefer) ----\n%R\n%R\n", a->rule, b->rule);
357 else if(debug == 2)
358 fprint(2,"---- B wins (assoc) ----\n[%d] %#A\n[%d] %#A\n", ta, a, tb, b);
359 else
360 fprint(2,"---- B wins ----\n%R\n%R\n", a->rule, b->rule);
362 bv->ast = nil; // now b is ours
363 astswap(a, b);
364 astfree(b);
365 return;
369 Ast*
370 parsetext(Type *tg, Name text)
372 Ast *ast;
373 RgParse *p;
374 char *t;
375 ParseValue *pv;
376 CfgSym *gsym;
378 if(text == nil)
379 text = N("");
381 gsym = tg->sym;
382 p = rgnewparse(gsym->gram);
383 p->file = N("<string>");
384 p->line = 1;
385 p->col = 1;
386 p->merge = glrmerge;
387 p->free = pvfree;
388 p->dup = pvdup;
389 // p->touch = pvtouch;
390 t = nametostr(text);
391 parseactiontype = typegramsym(tg, nil, nil);
392 pv = rgparse(gsym->gram, gsym, p, lextext, &t);
393 parseactiontype = nil;
394 if(pv == nil){
395 // The lexer might have seen some # lines and
396 // updated p->file and p->line.
397 fprint(2, "%s:%d: error parsing, near %.20s\n",
398 p->file ? p->file : N("???"), p->line, t);
399 return nil;
401 if(t != nil){
402 fprint(2, "%s:%d: error parsing, didn't reach eof, near %.20s\n",
403 p->file ? p->file : N("???"), p->line, t);
404 return nil;
406 ast = astleak(pv->ast);
407 // glrfreeparse(p);
409 // XXX validateast ?
411 return ast;