1 /*********************************************************************
3 Gazelle: a system for building fast, reusable parsers
7 Once a compiled grammar has been loaded into memory, the routines
8 in this file are what actually does the parsing. This file is an
9 "interpreter" in the sense that it parses the input by using the
10 grammar as a data structure -- no grammar-specific code is ever
11 generated or executed. Despite this, it is still quite fast, and
12 has a very low memory footprint.
14 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
16 *********************************************************************/
21 #include "interpreter.h"
23 #define RESIZE_ARRAY_IF_NECESSARY(ptr, size, desired_size) \
24 if(size < desired_size) \
27 ptr = realloc(ptr, size*sizeof(*ptr)); \
30 struct parse_stack_frame
*init_new_stack_frame(struct parse_state
*parse_state
, struct rtn
*rtn
, int begin
)
32 struct parse_stack_frame
*frame
= &parse_state
->parse_stack
[parse_state
->parse_stack_length
++];
34 frame
->rtn_state
= &rtn
->states
[0];
35 frame
->slots
.rtn
= rtn
;
36 frame
->rtn_transition
= NULL
;
37 frame
->slots
.num_slots
= rtn
->num_slots
;
38 frame
->start_offset
= begin
;
40 RESIZE_ARRAY_IF_NECESSARY(parse_state
->slotbuf
, parse_state
->slotbuf_size
,
41 parse_state
->slotbuf_len
+ frame
->slots
.num_slots
);
43 frame
->slots
.slots
= &parse_state
->slotbuf
[parse_state
->slotbuf_len
];
44 for(int i
= 0; i
< frame
->slots
.num_slots
; i
++)
45 frame
->slots
.slots
[i
].type
= PARSE_VAL_EMPTY
;
47 parse_state
->slotbuf_len
+= frame
->slots
.num_slots
;
52 struct parse_stack_frame
*pop_stack_frame(struct parse_state
*parse_state
)
54 struct parse_stack_frame
*frame
= &parse_state
->parse_stack
[parse_state
->parse_stack_length
-1];
55 parse_state
->slotbuf_len
-= frame
->slots
.num_slots
;
56 parse_state
->parse_stack_length
--;
57 if(parse_state
->parse_stack_length
== 0)
59 return NULL
; /* the parse is over */
64 frame
->rtn_state
= frame
->rtn_transition
->dest_state
;
69 void reset_dfa_match(struct parse_state
*parse_state
)
71 struct parse_stack_frame
*frame
= &parse_state
->parse_stack
[parse_state
->parse_stack_length
-1];
72 parse_state
->dfa
= frame
->rtn_state
->term_dfa
;
73 parse_state
->dfa_state
= &parse_state
->dfa
->states
[0];
74 parse_state
->match_begin
= parse_state
->offset
;
75 parse_state
->last_match_state
= NULL
;
78 void do_rtn_transition(struct parse_state
*parse_state
, int match_begin
, int match_len
, char *terminal
)
80 struct parse_stack_frame
*frame
= &parse_state
->parse_stack
[parse_state
->parse_stack_length
-1];
81 struct parse_val val
= {PARSE_VAL_TERMINAL
, {.terminal
= {match_begin
, match_len
}}};
82 bool found_transition
= false;
84 /* is this an ignored terminal? if so, just skip it */
85 for(int i
= 0; i
< frame
->rtn
->num_ignore
; i
++)
87 if(frame
->rtn
->ignore_terminals
[i
] == terminal
)
89 reset_dfa_match(parse_state
);
94 /* find a transition out of this RTN state on this terminal */
95 for(int i
= 0; i
< frame
->rtn_state
->num_transitions
; i
++)
97 struct rtn_transition
*t
= &frame
->rtn_state
->transitions
[i
];
98 if(t
->transition_type
== TERMINAL_TRANSITION
&& t
->edge
.terminal_name
== terminal
)
100 frame
->slots
.slots
[t
->slotnum
] = val
;
101 frame
->rtn_state
= t
->dest_state
;
102 found_transition
= true;
105 else if(t
->transition_type
== DECISION
&& t
->edge
.decision
->terminal_name
== terminal
)
107 struct decision
*d
= t
->edge
.decision
;
109 for(int i
= 0; i
< d
->num_actions
; i
++)
111 struct rtn_transition
*t2
= &frame
->rtn_state
->transitions
[d
->actions
[i
]];
113 if(t2
->transition_type
== NONTERM_TRANSITION
)
115 RESIZE_ARRAY_IF_NECESSARY(parse_state
->parse_stack
,
116 parse_state
->parse_stack_size
,
117 parse_state
->parse_stack_length
+1);
118 frame
->rtn_transition
= t2
;
119 frame
= init_new_stack_frame(parse_state
, t2
->edge
.nonterminal
, match_begin
);
121 else if(t2
->transition_type
== TERMINAL_TRANSITION
)
123 frame
->slots
.slots
[t2
->slotnum
] = val
;
124 frame
->rtn_state
= t2
->dest_state
;
127 found_transition
= true;
134 while(parse_state
->parse_stack_length
> 0 && frame
->rtn_state
->is_final
)
136 for(int i
= 0; i
< parse_state
->num_completion_callbacks
; i
++)
138 struct completion_callback
*cb
= &parse_state
->callbacks
[i
];
139 if(frame
->rtn
->name
== cb
->rtn_name
)
140 cb
->callback(parse_state
, parse_state
->user_data
);
143 frame
= pop_stack_frame(parse_state
);
146 if(parse_state
->parse_stack_length
> 0)
147 reset_dfa_match(parse_state
);
151 printf("Syntax error -- no RTN transition!!\n");
155 void refill_buffer(struct parse_state
*state
)
157 struct buffer
*b
= state
->buffer
;
158 /* if more than 1/4 of the buffer is precious (can't be discarded), double the
160 int precious_len
= state
->offset
- state
->precious_offset
;
161 if(precious_len
> (b
->size
/ 4))
164 b
->buf
= realloc(b
->buf
, b
->size
);
167 memmove(b
->buf
, b
->buf
+ state
->precious_offset
- b
->base_offset
, precious_len
);
168 b
->len
= precious_len
;
169 b
->base_offset
= state
->precious_offset
;
171 /* now read from the file as much as we can */
172 int bytes_read
= fread(b
->buf
+ b
->len
, sizeof(char), b
->size
- b
->len
, b
->file
);
182 printf("Error reading from file!\n");
187 b
->len
+= bytes_read
;
190 void parse(struct parse_state
*parse_state
, bool *eof
)
192 bool user_cancelled
= false;
194 while(!parse_state
->buffer
->is_eof
&& !user_cancelled
&& parse_state
->parse_stack_length
> 0)
197 if(parse_state
->offset
== parse_state
->buffer
->base_offset
+ parse_state
->buffer
->len
)
198 refill_buffer(parse_state
);
200 int ch
= parse_state
->buffer
->buf
[parse_state
->offset
- parse_state
->buffer
->base_offset
];
202 /* We've read one character, which should cause one transition in the DFA for terminals.
203 * Find the appropriate transition, and put the DFA in its new state. */
204 for(int i
= 0; i
< parse_state
->dfa_state
->num_transitions
; i
++)
206 struct intfa_transition
*t
= &parse_state
->dfa_state
->transitions
[i
];
208 if(ch
>= t
->ch_low
&& ch
<= t
->ch_high
)
210 parse_state
->dfa_state
= t
->dest_state
;
211 if(parse_state
->dfa_state
->final
)
213 parse_state
->last_match_state
= parse_state
->dfa_state
;
214 parse_state
->last_match_end
= parse_state
->offset
;
216 parse_state
->offset
++;
221 /* since we fell out of the loop, there was no match.
222 * if there was a previous match, fall back to that. otherwise this character represents
224 if(parse_state
->last_match_state
)
226 /* we have a terminal. do RTN transitions as appropriate */
227 parse_state
->offset
= parse_state
->last_match_end
+ 1;
228 do_rtn_transition(parse_state
, parse_state
->match_begin
, parse_state
->last_match_end
,
229 parse_state
->last_match_state
->final
);
233 printf("Syntax error!\n");
237 if(parse_state
->buffer
->is_eof
) *eof
= true;
240 void alloc_parse_state(struct parse_state
*state
)
242 state
->buffer
= malloc(sizeof(struct buffer
));
243 state
->buffer
->size
= 4096;
244 state
->buffer
->buf
= malloc((state
->buffer
->size
) * sizeof(char));
246 state
->parse_stack_size
= 50;
247 state
->parse_stack
= malloc(sizeof(struct parse_stack_frame
) * state
->parse_stack_size
);
249 state
->slotbuf_size
= 200;
250 state
->slotbuf
= malloc(sizeof(*state
->slotbuf
) * state
->slotbuf_size
);
252 state
->callbacks
= malloc(sizeof(*state
->callbacks
) * 10); /* XXX */
255 static void init_parse_state_common(struct parse_state
*state
);
257 void reinit_parse_state(struct parse_state
*state
)
259 state
->buffer
->base_offset
-= state
->offset
;
260 init_parse_state_common(state
);
263 void init_parse_state(struct parse_state
*state
, struct grammar
*g
, FILE *file
)
267 state
->buffer
->file
= file
;
268 state
->buffer
->len
= 0;
269 state
->buffer
->base_offset
= 0;
270 state
->buffer
->is_eof
= false;
271 state
->num_completion_callbacks
= 0;
273 init_parse_state_common(state
);
276 static void init_parse_state_common(struct parse_state
*state
)
279 state
->precious_offset
= 0;
280 state
->parse_stack_length
= 1;
281 state
->match_begin
= 0;
282 state
->last_match_end
= 0;
283 state
->last_match_state
= NULL
;
284 state
->parse_stack_length
= 0;
285 state
->slotbuf_len
= 0;
287 init_new_stack_frame(state
, &state
->grammar
->rtns
[0], 0);
288 reset_dfa_match(state
);
291 void free_parse_state(struct parse_state
*state
)
293 free(state
->buffer
->buf
);
295 free(state
->parse_stack
);
296 free(state
->slotbuf
);
299 void register_callback(struct parse_state
*state
, char *rtn_name
, parse_callback_t callback
, void *user_data
)
301 struct completion_callback
*cb
= &state
->callbacks
[state
->num_completion_callbacks
++];
302 cb
->callback
= callback
;
303 state
->user_data
= user_data
;
304 for(char **strs
= state
->grammar
->strings
; *strs
; strs
++)
305 if(strcmp(*strs
, rtn_name
) == 0)
307 cb
->rtn_name
= *strs
;
313 * c-file-style: "bsd"
315 * indent-tabs-mode: nil