Sys.Signals module for a Variant type of signals (and a set_signal function that...
[ocaml.git] / byterun / lexing.c
blobd2776116efe941cb0ec77ab4b4a06c37811cbc15
1 /***********************************************************************/
2 /* */
3 /* Objective Caml */
4 /* */
5 /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
6 /* */
7 /* Copyright 1996 Institut National de Recherche en Informatique et */
8 /* en Automatique. All rights reserved. This file is distributed */
9 /* under the terms of the GNU Library General Public License, with */
10 /* the special exception on linking described in file ../LICENSE. */
11 /* */
12 /***********************************************************************/
14 /* $Id$ */
16 /* The table-driven automaton for lexers generated by camllex. */
18 #include "fail.h"
19 #include "mlvalues.h"
20 #include "stacks.h"
22 struct lexer_buffer {
23 value refill_buff;
24 value lex_buffer;
25 value lex_buffer_len;
26 value lex_abs_pos;
27 value lex_start_pos;
28 value lex_curr_pos;
29 value lex_last_pos;
30 value lex_last_action;
31 value lex_eof_reached;
32 value lex_mem;
33 value lex_start_p;
34 value lex_curr_p;
37 struct lexing_table {
38 value lex_base;
39 value lex_backtrk;
40 value lex_default;
41 value lex_trans;
42 value lex_check;
43 value lex_base_code;
44 value lex_backtrk_code;
45 value lex_default_code;
46 value lex_trans_code;
47 value lex_check_code;
48 value lex_code;
51 #if defined(ARCH_BIG_ENDIAN) || SIZEOF_SHORT != 2
52 #define Short(tbl,n) \
53 (*((unsigned char *)((tbl) + (n) * 2)) + \
54 (*((schar *)((tbl) + (n) * 2 + 1)) << 8))
55 #else
56 #define Short(tbl,n) (((short *)(tbl))[(n)])
57 #endif
59 CAMLprim value caml_lex_engine(struct lexing_table *tbl, value start_state,
60 struct lexer_buffer *lexbuf)
62 int state, base, backtrk, c;
64 state = Int_val(start_state);
65 if (state >= 0) {
66 /* First entry */
67 lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
68 lexbuf->lex_last_action = Val_int(-1);
69 } else {
70 /* Reentry after refill */
71 state = -state - 1;
73 while(1) {
74 /* Lookup base address or action number for current state */
75 base = Short(tbl->lex_base, state);
76 if (base < 0) return Val_int(-base-1);
77 /* See if it's a backtrack point */
78 backtrk = Short(tbl->lex_backtrk, state);
79 if (backtrk >= 0) {
80 lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
81 lexbuf->lex_last_action = Val_int(backtrk);
83 /* See if we need a refill */
84 if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
85 if (lexbuf->lex_eof_reached == Val_bool (0)){
86 return Val_int(-state - 1);
87 }else{
88 c = 256;
90 }else{
91 /* Read next input char */
92 c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
93 lexbuf->lex_curr_pos += 2;
95 /* Determine next state */
96 if (Short(tbl->lex_check, base + c) == state)
97 state = Short(tbl->lex_trans, base + c);
98 else
99 state = Short(tbl->lex_default, state);
100 /* If no transition on this char, return to last backtrack point */
101 if (state < 0) {
102 lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
103 if (lexbuf->lex_last_action == Val_int(-1)) {
104 caml_failwith("lexing: empty token");
105 } else {
106 return lexbuf->lex_last_action;
108 }else{
109 /* Erase the EOF condition only if the EOF pseudo-character was
110 consumed by the automaton (i.e. there was no backtrack above)
112 if (c == 256) lexbuf->lex_eof_reached = Val_bool (0);
117 /***********************************************/
118 /* New lexer engine, with memory of positions */
119 /***********************************************/
121 static void run_mem(char *pc, value mem, value curr_pos) {
122 for (;;) {
123 unsigned char dst, src ;
125 dst = *pc++ ;
126 if (dst == 0xff)
127 return ;
128 src = *pc++ ;
129 if (src == 0xff) {
130 /* fprintf(stderr,"[%hhu] <- %d\n",dst,Int_val(curr_pos)) ;*/
131 Field(mem,dst) = curr_pos ;
132 } else {
133 /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */
134 Field(mem,dst) = Field(mem,src) ;
139 static void run_tag(char *pc, value mem) {
140 for (;;) {
141 unsigned char dst, src ;
143 dst = *pc++ ;
144 if (dst == 0xff)
145 return ;
146 src = *pc++ ;
147 if (src == 0xff) {
148 /* fprintf(stderr,"[%hhu] <- -1\n",dst) ; */
149 Field(mem,dst) = Val_int(-1) ;
150 } else {
151 /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */
152 Field(mem,dst) = Field(mem,src) ;
157 CAMLprim value caml_new_lex_engine(struct lexing_table *tbl, value start_state,
158 struct lexer_buffer *lexbuf)
160 int state, base, backtrk, c, pstate ;
161 state = Int_val(start_state);
162 if (state >= 0) {
163 /* First entry */
164 lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
165 lexbuf->lex_last_action = Val_int(-1);
166 } else {
167 /* Reentry after refill */
168 state = -state - 1;
170 while(1) {
171 /* Lookup base address or action number for current state */
172 base = Short(tbl->lex_base, state);
173 if (base < 0) {
174 int pc_off = Short(tbl->lex_base_code, state) ;
175 run_tag(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem);
176 /* fprintf(stderr,"Perform: %d\n",-base-1) ; */
177 return Val_int(-base-1);
179 /* See if it's a backtrack point */
180 backtrk = Short(tbl->lex_backtrk, state);
181 if (backtrk >= 0) {
182 int pc_off = Short(tbl->lex_backtrk_code, state);
183 run_tag(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem);
184 lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
185 lexbuf->lex_last_action = Val_int(backtrk);
188 /* See if we need a refill */
189 if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
190 if (lexbuf->lex_eof_reached == Val_bool (0)){
191 return Val_int(-state - 1);
192 }else{
193 c = 256;
195 }else{
196 /* Read next input char */
197 c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
198 lexbuf->lex_curr_pos += 2;
200 /* Determine next state */
201 pstate=state ;
202 if (Short(tbl->lex_check, base + c) == state)
203 state = Short(tbl->lex_trans, base + c);
204 else
205 state = Short(tbl->lex_default, state);
206 /* If no transition on this char, return to last backtrack point */
207 if (state < 0) {
208 lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
209 if (lexbuf->lex_last_action == Val_int(-1)) {
210 caml_failwith("lexing: empty token");
211 } else {
212 return lexbuf->lex_last_action;
214 }else{
215 /* If some transition, get and perform memory moves */
216 int base_code = Short(tbl->lex_base_code, pstate) ;
217 int pc_off ;
218 if (Short(tbl->lex_check_code, base_code + c) == pstate)
219 pc_off = Short(tbl->lex_trans_code, base_code + c) ;
220 else
221 pc_off = Short(tbl->lex_default_code, pstate) ;
222 if (pc_off > 0)
223 run_mem(Bp_val(tbl->lex_code) + pc_off, lexbuf->lex_mem, lexbuf->lex_curr_pos) ;
224 /* Erase the EOF condition only if the EOF pseudo-character was
225 consumed by the automaton (i.e. there was no backtrack above)
227 if (c == 256) lexbuf->lex_eof_reached = Val_bool (0);