Tweak the header slightly
[philodendron.git] / runtime / interpreter.c
blob3c6fbdfdcdf7d0184c6ec078c84001c6786884e5
1 /*********************************************************************
3 Gazelle: a system for building fast, reusable parsers
5 interpreter.c
7 Once a compiled grammar has been loaded into memory, the routines
8 in this file are what actually does the parsing. This file is an
9 "interpreter" in the sense that it parses the input by using the
10 grammar as a data structure -- no grammar-specific code is ever
11 generated or executed. Despite this, it is still quite fast, and
12 has a very low memory footprint.
14 Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
16 *********************************************************************/
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include "interpreter.h"
23 #define RESIZE_ARRAY_IF_NECESSARY(ptr, size, desired_size) \
24 if(size < desired_size) \
25 { \
26 size *= 2; \
27 ptr = realloc(ptr, size*sizeof(*ptr)); \
30 struct parse_stack_frame *init_new_stack_frame(struct parse_state *parse_state, struct rtn *rtn, int begin)
32 struct parse_stack_frame *frame = &parse_state->parse_stack[parse_state->parse_stack_length++];
33 frame->rtn = rtn;
34 frame->rtn_state = &rtn->states[0];
35 frame->slots.rtn = rtn;
36 frame->rtn_transition = NULL;
37 frame->slots.num_slots = rtn->num_slots;
38 frame->start_offset = begin;
40 RESIZE_ARRAY_IF_NECESSARY(parse_state->slotbuf, parse_state->slotbuf_size,
41 parse_state->slotbuf_len + frame->slots.num_slots);
43 frame->slots.slots = &parse_state->slotbuf[parse_state->slotbuf_len];
44 for(int i = 0; i < frame->slots.num_slots; i++)
45 frame->slots.slots[i].type = PARSE_VAL_EMPTY;
47 parse_state->slotbuf_len += frame->slots.num_slots;
49 return frame;
52 struct parse_stack_frame *pop_stack_frame(struct parse_state *parse_state)
54 struct parse_stack_frame *frame = &parse_state->parse_stack[parse_state->parse_stack_length-1];
55 parse_state->slotbuf_len -= frame->slots.num_slots;
56 parse_state->parse_stack_length--;
57 if(parse_state->parse_stack_length == 0)
59 return NULL; /* the parse is over */
61 else
63 frame--;
64 frame->rtn_state = frame->rtn_transition->dest_state;
65 return frame;
69 void reset_dfa_match(struct parse_state *parse_state)
71 struct parse_stack_frame *frame = &parse_state->parse_stack[parse_state->parse_stack_length-1];
72 parse_state->dfa = frame->rtn_state->term_dfa;
73 parse_state->dfa_state = &parse_state->dfa->states[0];
74 parse_state->match_begin = parse_state->offset;
75 parse_state->last_match_state = NULL;
78 void do_rtn_transition(struct parse_state *parse_state, int match_begin, int match_len, char *terminal)
80 struct parse_stack_frame *frame = &parse_state->parse_stack[parse_state->parse_stack_length-1];
81 struct parse_val val = {PARSE_VAL_TERMINAL, {.terminal = {match_begin, match_len}}};
82 bool found_transition = false;
84 /* is this an ignored terminal? if so, just skip it */
85 for(int i = 0; i < frame->rtn->num_ignore; i++)
87 if(frame->rtn->ignore_terminals[i] == terminal)
89 reset_dfa_match(parse_state);
90 return;
94 /* find a transition out of this RTN state on this terminal */
95 for(int i = 0; i < frame->rtn_state->num_transitions; i++)
97 struct rtn_transition *t = &frame->rtn_state->transitions[i];
98 if(t->transition_type == TERMINAL_TRANSITION && t->edge.terminal_name == terminal)
100 frame->slots.slots[t->slotnum] = val;
101 frame->rtn_state = t->dest_state;
102 found_transition = true;
103 break;
105 else if(t->transition_type == DECISION && t->edge.decision->terminal_name == terminal)
107 struct decision *d = t->edge.decision;
109 for(int i = 0; i < d->num_actions; i++)
111 struct rtn_transition *t2 = &frame->rtn_state->transitions[d->actions[i]];
113 if(t2->transition_type == NONTERM_TRANSITION)
115 RESIZE_ARRAY_IF_NECESSARY(parse_state->parse_stack,
116 parse_state->parse_stack_size,
117 parse_state->parse_stack_length+1);
118 frame->rtn_transition = t2;
119 frame = init_new_stack_frame(parse_state, t2->edge.nonterminal, match_begin);
121 else if(t2->transition_type == TERMINAL_TRANSITION)
123 frame->slots.slots[t2->slotnum] = val;
124 frame->rtn_state = t2->dest_state;
127 found_transition = true;
128 break;
132 if(found_transition)
134 while(parse_state->parse_stack_length > 0 && frame->rtn_state->is_final)
136 for(int i = 0; i < parse_state->num_completion_callbacks; i++)
138 struct completion_callback *cb = &parse_state->callbacks[i];
139 if(frame->rtn->name == cb->rtn_name)
140 cb->callback(parse_state, parse_state->user_data);
143 frame = pop_stack_frame(parse_state);
146 if(parse_state->parse_stack_length > 0)
147 reset_dfa_match(parse_state);
149 else
151 printf("Syntax error -- no RTN transition!!\n");
155 void refill_buffer(struct parse_state *state)
157 struct buffer *b = state->buffer;
158 /* if more than 1/4 of the buffer is precious (can't be discarded), double the
159 * buffer size */
160 int precious_len = state->offset - state->precious_offset;
161 if(precious_len > (b->size / 4))
163 b->size *= 2;
164 b->buf = realloc(b->buf, b->size);
167 memmove(b->buf, b->buf + state->precious_offset - b->base_offset, precious_len);
168 b->len = precious_len;
169 b->base_offset = state->precious_offset;
171 /* now read from the file as much as we can */
172 int bytes_read = fread(b->buf + b->len, sizeof(char), b->size - b->len, b->file);
174 if(bytes_read == 0)
176 if(feof(b->file))
178 b->is_eof = true;
180 else
182 printf("Error reading from file!\n");
183 exit(1);
187 b->len += bytes_read;
190 void parse(struct parse_state *parse_state, bool *eof)
192 bool user_cancelled = false;
194 while(!parse_state->buffer->is_eof && !user_cancelled && parse_state->parse_stack_length > 0)
196 again:
197 if(parse_state->offset == parse_state->buffer->base_offset + parse_state->buffer->len)
198 refill_buffer(parse_state);
200 int ch = parse_state->buffer->buf[parse_state->offset - parse_state->buffer->base_offset];
202 /* We've read one character, which should cause one transition in the DFA for terminals.
203 * Find the appropriate transition, and put the DFA in its new state. */
204 for(int i = 0; i < parse_state->dfa_state->num_transitions; i++)
206 struct intfa_transition *t = &parse_state->dfa_state->transitions[i];
208 if(ch >= t->ch_low && ch <= t->ch_high)
210 parse_state->dfa_state = t->dest_state;
211 if(parse_state->dfa_state->final)
213 parse_state->last_match_state = parse_state->dfa_state;
214 parse_state->last_match_end = parse_state->offset;
216 parse_state->offset++;
217 goto again;
221 /* since we fell out of the loop, there was no match.
222 * if there was a previous match, fall back to that. otherwise this character represents
223 * a syntax error. */
224 if(parse_state->last_match_state)
226 /* we have a terminal. do RTN transitions as appropriate */
227 parse_state->offset = parse_state->last_match_end + 1;
228 do_rtn_transition(parse_state, parse_state->match_begin, parse_state->last_match_end,
229 parse_state->last_match_state->final);
231 else
233 printf("Syntax error!\n");
237 if(parse_state->buffer->is_eof) *eof = true;
240 void alloc_parse_state(struct parse_state *state)
242 state->buffer = malloc(sizeof(struct buffer));
243 state->buffer->size = 4096;
244 state->buffer->buf = malloc((state->buffer->size) * sizeof(char));
246 state->parse_stack_size = 50;
247 state->parse_stack = malloc(sizeof(struct parse_stack_frame) * state->parse_stack_size);
249 state->slotbuf_size = 200;
250 state->slotbuf = malloc(sizeof(*state->slotbuf) * state->slotbuf_size);
252 state->callbacks = malloc(sizeof(*state->callbacks) * 10); /* XXX */
255 static void init_parse_state_common(struct parse_state *state);
257 void reinit_parse_state(struct parse_state *state)
259 state->buffer->base_offset -= state->offset;
260 init_parse_state_common(state);
263 void init_parse_state(struct parse_state *state, struct grammar *g, FILE *file)
265 state->grammar = g;
267 state->buffer->file = file;
268 state->buffer->len = 0;
269 state->buffer->base_offset = 0;
270 state->buffer->is_eof = false;
271 state->num_completion_callbacks = 0;
273 init_parse_state_common(state);
276 static void init_parse_state_common(struct parse_state *state)
278 state->offset = 0;
279 state->precious_offset = 0;
280 state->parse_stack_length = 1;
281 state->match_begin = 0;
282 state->last_match_end = 0;
283 state->last_match_state = NULL;
284 state->parse_stack_length = 0;
285 state->slotbuf_len = 0;
287 init_new_stack_frame(state, &state->grammar->rtns[0], 0);
288 reset_dfa_match(state);
291 void free_parse_state(struct parse_state *state)
293 free(state->buffer->buf);
294 free(state->buffer);
295 free(state->parse_stack);
296 free(state->slotbuf);
299 void register_callback(struct parse_state *state, char *rtn_name, parse_callback_t callback, void *user_data)
301 struct completion_callback *cb = &state->callbacks[state->num_completion_callbacks++];
302 cb->callback = callback;
303 state->user_data = user_data;
304 for(char **strs = state->grammar->strings; *strs; strs++)
305 if(strcmp(*strs, rtn_name) == 0)
307 cb->rtn_name = *strs;
312 * Local Variables:
313 * c-file-style: "bsd"
314 * c-basic-offset: 4
315 * indent-tabs-mode: nil
316 * End:
317 * vim:et:sts=4:sw=4