1 // Parsing using grammars.
9 int FLAGS_parsemergedebug
= 0;
11 static ParseValue
*freepvs
;
14 mkParseValue(Ast
*ast
)
18 if((pv
= freepvs
) != nil
)
19 freepvs
= (void*)pv
->ast
;
21 pv
= emallocnz(sizeof(ParseValue
));
30 pvfree(CfgSym
*sym
, ParseValue
*pv
)
32 if(pv
== nil
|| --pv
->ref
> 0)
36 pv
->ast
= (void*)freepvs
;
41 pvdup(CfgSym
*sym
, ParseValue
**a
, ParseValue
*b
)
49 pvtouch(ParseValue
*pv
, int pass
)
56 assert(pv
->ast
->ref
> 0);
62 astrefcheck(pv
->ast
, 0);
65 if(pv
->refcheck
++ == 0)
67 astrefcheck(pv
->ast
, 1);
70 if(pv
->refcheck
!= -1){
71 if(pv
->ref
< pv
->refcheck
)
72 fprint(2, "%d < %d | PV %p\n", pv
->ref
, pv
->refcheck
, pv
);
74 astrefcheck(pv
->ast
, 2);
82 //============================================================
83 // Parsing action invoked during GLR parsing.
86 Type
*parseactiontype
;
89 parseaction(CfgGram
*cfg
, CfgRule
*rule
, ParseValue
**outp
, ParseValue
**in
)
95 if(!issubtype(rule
->tgram
, parseactiontype
)){
96 //fprint(2, "rejecting: %R [%T <: %T]\n", rule, rule->tgram, parseactiontype);
100 right
= emalloc(rule
->nright
*sizeof right
[0]);
102 for(i
=0; i
<rule
->nright
; i
++){
105 // Only save the ASTs that aren't literal strings.
106 // The literal strings can be inferred from the rule.
107 // (They're the "concrete" syntax, kind of.)
108 if(nametostr(rule
->right
[i
]->name
)[0] != '"')
112 if(n
!= rule
->nsavedright
)
113 panic("parseaction - %R %d %d", rule
, n
, rule
->nsavedright
);
115 *outp
= out
= mkParseValue(mkastrule(rule
, right
));
116 for(i
=0; i
<rule
->nright
; i
++){
117 if(out
->line
.file
== nil
)
118 out
->line
= in
[i
]->line
;
120 out
->ast
->line
= out
->line
;
121 if(rule
->nright
== 0)
122 out
->line
= lastline
;
123 lastline
= out
->line
;
128 // ========================================================
129 // Parsing of regular text (not patterns)
131 static void lexsharp(RgParse
*, char**);
133 // GLR lexer callback. Should get the next token and
134 // return a list of RgToken* representing the various
135 // ways to interpret that token. Each RgToken is a
136 // CfgSym* paired with its corresponding Ast*.
137 // The state, in vstate, is a char** pointing at the
138 // current input location (itself a char*).
140 lextext(RgParse
*parse
, CfgGram
*cfg
, void *v
)
155 sol
= parse
->col
== 1;
157 // Loop looking for a non-ignorable token.
161 if(p
== nil
|| *p
== 0){
163 return mkRgTokenL(rgnewtoken(parse
, cfg
->eofsym
), nil
);
166 // process # line directives
170 // get next token (actually token set)
172 l
= rundfa(gram
->dfa
, p
, &p
);
174 if(p
== op
|| l
== nil
)
177 // scan lexed text to update line, column
186 if(sol
&& *s
!= ' ' && *s
!= '\t')
190 }while(gramignoring(gram
, l
->hd
));
193 name
= nameofn(op
, len
);
197 t
= rgnewtoken(parse
, sym
);
198 pv
= mkParseValue(mkaststring(sym
, name
));
200 pv
->line
.file
= parse
->file
;
201 pv
->line
.line
= parse
->line
;
202 tl
= mkRgTokenL(t
, tl
);
204 return revRgTokenL(tl
);
207 // Parse the cpp's "# ..." directives, assuming that the current
208 // position is "^". This will eat all sol whitespace and record
209 // line number info (if present). We could do some fancier
210 // parsing of pragmas, but we won't.
212 lexsharp(RgParse
*parse
, char **pp
)
220 while(**pp
&& (**pp
== ' ' || **pp
== '\t'))
226 while(**pp
&& (**pp
== ' ' || **pp
== '\t'))
229 if(**pp
< '0' || '9' < **pp
)
231 while(**pp
&& '0' <= **pp
&& **pp
<= '9'){
232 line
= line
*10 + (**pp
- '0');
235 parse
->line
= line
- 1; // the \n will add 1
237 while(**pp
&& (**pp
== ' ' || **pp
== '\t'))
242 for(i
=0; **pp
&& **pp
!= '"'; i
++){
248 if(i
>= (int)sizeof buf
){
249 buf
[sizeof(buf
)-1] = '\0';
250 panic("file name too long near '%s'", buf
);
256 parse
->file
= nameof(buf
);
259 while(**pp
&& **pp
!= '\n')
262 parse
->col
+= *pp
- op
;
265 // Two different parses, a and b, have been found for
266 // the same segment of text. Choose which to use
267 // and update av in place. It's worse than that, because
268 // the RNGLR parser can use an Ast before it has been
269 // fully merged. Rather than fix that (a lot of work),
270 // we make glrmerge sneakily use astswap to make sure
271 // that av->ast updates in place. This is only okay because
272 // all the ASTs manipulated were just created in the parser,
273 // so we know that no one else is pointing at them other
274 // than the values created during the parse.
276 glrmerge(CfgSym
*sym
, ParseValue
*av
, ParseValue
*bv
)
280 CfgPrec
*aprec
, *bprec
;
289 // Can get these when some productions can yield empty strings.
297 if(a
->rule
&& a
->rule
== b
->rule
&& a
->rule
->nright
== 0)
300 aprec
= a
->rule
? a
->rule
->prec
: nil
;
301 bprec
= b
->rule
? b
->rule
->prec
: nil
;
303 // Rule preference resolution.
304 if(aprec
&& bprec
&& aprec
->kind
== CfgPrecPrefer
&& bprec
->kind
== aprec
->kind
){
305 int r
= cfgcompareprec(aprec
, bprec
);
314 // Rule priority resolution is done in the action rule
315 // (because the merge-node parent actually does the resolution).
316 // Associativity resolution is also done in the action rule,
317 // so that it applies even if there is no ambiguity.
320 if(FLAGS_parsemergedebug
)
321 fprint(2,"---- Merge [%p.%d/%p] ----\n%R\n%R\n", aprec
, aprec
? aprec
->assoc
: 0, bprec
, a
->rule
, b
->rule
);
323 // First make a copy of a at a new location,
324 // since the merged node is going to use a's
325 // location, and we don't want it pointing at itself.
326 aa
= mkAst(a
->op
, a
->sym
);
329 // Now make the merge node.
330 m
= mkastmerge(aa
, b
);
332 // Now move the merge node into a's location.
339 if(FLAGS_parsemergedebug
)
340 fprint(2, "%#lA\n----\n", av
->ast
);
344 if(FLAGS_parsemergedebug
){
346 fprint(2,"---- A wins (prefer) ----\n%R\n%R\n", a
->rule
, b
->rule
);
348 fprint(2,"---- A wins (assoc) ----\n[%d] %#A\n[%d] %#A\n", ta
, a
, tb
, b
);
350 fprint(2,"---- A wins ----\n%R\n%R\n", a
->rule
, b
->rule
);
354 if(FLAGS_parsemergedebug
){
356 fprint(2,"---- B wins (prefer) ----\n%R\n%R\n", a
->rule
, b
->rule
);
358 fprint(2,"---- B wins (assoc) ----\n[%d] %#A\n[%d] %#A\n", ta
, a
, tb
, b
);
360 fprint(2,"---- B wins ----\n%R\n%R\n", a
->rule
, b
->rule
);
362 bv
->ast
= nil
; // now b is ours
370 parsetext(Type
*tg
, Name text
)
382 p
= rgnewparse(gsym
->gram
);
383 p
->file
= N("<string>");
389 // p->touch = pvtouch;
391 parseactiontype
= typegramsym(tg
, nil
, nil
);
392 pv
= rgparse(gsym
->gram
, gsym
, p
, lextext
, &t
);
393 parseactiontype
= nil
;
395 // The lexer might have seen some # lines and
396 // updated p->file and p->line.
397 fprint(2, "%s:%d: error parsing, near %.20s\n",
398 p
->file
? p
->file
: N("???"), p
->line
, t
);
402 fprint(2, "%s:%d: error parsing, didn't reach eof, near %.20s\n",
403 p
->file
? p
->file
: N("???"), p
->line
, t
);
406 ast
= astleak(pv
->ast
);