1 /***********************************************************************/
5 /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
7 /* Copyright 1996 Institut National de Recherche en Informatique et */
8 /* en Automatique. All rights reserved. This file is distributed */
9 /* under the terms of the GNU Library General Public License, with */
10 /* the special exception on linking described in file ../LICENSE. */
12 /***********************************************************************/
16 /* The table-driven automaton for lexers generated by camllex. */
30 value lex_last_action
;
31 value lex_eof_reached
;
44 value lex_backtrk_code
;
45 value lex_default_code
;
51 #if defined(ARCH_BIG_ENDIAN) || SIZEOF_SHORT != 2
52 #define Short(tbl,n) \
53 (*((unsigned char *)((tbl) + (n) * 2)) + \
54 (*((schar *)((tbl) + (n) * 2 + 1)) << 8))
56 #define Short(tbl,n) (((short *)(tbl))[(n)])
59 CAMLprim value
caml_lex_engine(struct lexing_table
*tbl
, value start_state
,
60 struct lexer_buffer
*lexbuf
)
62 int state
, base
, backtrk
, c
;
64 state
= Int_val(start_state
);
67 lexbuf
->lex_last_pos
= lexbuf
->lex_start_pos
= lexbuf
->lex_curr_pos
;
68 lexbuf
->lex_last_action
= Val_int(-1);
70 /* Reentry after refill */
74 /* Lookup base address or action number for current state */
75 base
= Short(tbl
->lex_base
, state
);
76 if (base
< 0) return Val_int(-base
-1);
77 /* See if it's a backtrack point */
78 backtrk
= Short(tbl
->lex_backtrk
, state
);
80 lexbuf
->lex_last_pos
= lexbuf
->lex_curr_pos
;
81 lexbuf
->lex_last_action
= Val_int(backtrk
);
83 /* See if we need a refill */
84 if (lexbuf
->lex_curr_pos
>= lexbuf
->lex_buffer_len
){
85 if (lexbuf
->lex_eof_reached
== Val_bool (0)){
86 return Val_int(-state
- 1);
91 /* Read next input char */
92 c
= Byte_u(lexbuf
->lex_buffer
, Long_val(lexbuf
->lex_curr_pos
));
93 lexbuf
->lex_curr_pos
+= 2;
95 /* Determine next state */
96 if (Short(tbl
->lex_check
, base
+ c
) == state
)
97 state
= Short(tbl
->lex_trans
, base
+ c
);
99 state
= Short(tbl
->lex_default
, state
);
100 /* If no transition on this char, return to last backtrack point */
102 lexbuf
->lex_curr_pos
= lexbuf
->lex_last_pos
;
103 if (lexbuf
->lex_last_action
== Val_int(-1)) {
104 caml_failwith("lexing: empty token");
106 return lexbuf
->lex_last_action
;
109 /* Erase the EOF condition only if the EOF pseudo-character was
110 consumed by the automaton (i.e. there was no backtrack above)
112 if (c
== 256) lexbuf
->lex_eof_reached
= Val_bool (0);
117 /***********************************************/
118 /* New lexer engine, with memory of positions */
119 /***********************************************/
121 static void run_mem(char *pc
, value mem
, value curr_pos
) {
123 unsigned char dst
, src
;
130 /* fprintf(stderr,"[%hhu] <- %d\n",dst,Int_val(curr_pos)) ;*/
131 Field(mem
,dst
) = curr_pos
;
133 /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */
134 Field(mem
,dst
) = Field(mem
,src
) ;
139 static void run_tag(char *pc
, value mem
) {
141 unsigned char dst
, src
;
148 /* fprintf(stderr,"[%hhu] <- -1\n",dst) ; */
149 Field(mem
,dst
) = Val_int(-1) ;
151 /* fprintf(stderr,"[%hhu] <- [%hhu]\n",dst,src) ; */
152 Field(mem
,dst
) = Field(mem
,src
) ;
157 CAMLprim value
caml_new_lex_engine(struct lexing_table
*tbl
, value start_state
,
158 struct lexer_buffer
*lexbuf
)
160 int state
, base
, backtrk
, c
, pstate
;
161 state
= Int_val(start_state
);
164 lexbuf
->lex_last_pos
= lexbuf
->lex_start_pos
= lexbuf
->lex_curr_pos
;
165 lexbuf
->lex_last_action
= Val_int(-1);
167 /* Reentry after refill */
171 /* Lookup base address or action number for current state */
172 base
= Short(tbl
->lex_base
, state
);
174 int pc_off
= Short(tbl
->lex_base_code
, state
) ;
175 run_tag(Bp_val(tbl
->lex_code
) + pc_off
, lexbuf
->lex_mem
);
176 /* fprintf(stderr,"Perform: %d\n",-base-1) ; */
177 return Val_int(-base
-1);
179 /* See if it's a backtrack point */
180 backtrk
= Short(tbl
->lex_backtrk
, state
);
182 int pc_off
= Short(tbl
->lex_backtrk_code
, state
);
183 run_tag(Bp_val(tbl
->lex_code
) + pc_off
, lexbuf
->lex_mem
);
184 lexbuf
->lex_last_pos
= lexbuf
->lex_curr_pos
;
185 lexbuf
->lex_last_action
= Val_int(backtrk
);
188 /* See if we need a refill */
189 if (lexbuf
->lex_curr_pos
>= lexbuf
->lex_buffer_len
){
190 if (lexbuf
->lex_eof_reached
== Val_bool (0)){
191 return Val_int(-state
- 1);
196 /* Read next input char */
197 c
= Byte_u(lexbuf
->lex_buffer
, Long_val(lexbuf
->lex_curr_pos
));
198 lexbuf
->lex_curr_pos
+= 2;
200 /* Determine next state */
202 if (Short(tbl
->lex_check
, base
+ c
) == state
)
203 state
= Short(tbl
->lex_trans
, base
+ c
);
205 state
= Short(tbl
->lex_default
, state
);
206 /* If no transition on this char, return to last backtrack point */
208 lexbuf
->lex_curr_pos
= lexbuf
->lex_last_pos
;
209 if (lexbuf
->lex_last_action
== Val_int(-1)) {
210 caml_failwith("lexing: empty token");
212 return lexbuf
->lex_last_action
;
215 /* If some transition, get and perform memory moves */
216 int base_code
= Short(tbl
->lex_base_code
, pstate
) ;
218 if (Short(tbl
->lex_check_code
, base_code
+ c
) == pstate
)
219 pc_off
= Short(tbl
->lex_trans_code
, base_code
+ c
) ;
221 pc_off
= Short(tbl
->lex_default_code
, pstate
) ;
223 run_mem(Bp_val(tbl
->lex_code
) + pc_off
, lexbuf
->lex_mem
, lexbuf
->lex_curr_pos
) ;
224 /* Erase the EOF condition only if the EOF pseudo-character was
225 consumed by the automaton (i.e. there was no backtrack above)
227 if (c
== 256) lexbuf
->lex_eof_reached
= Val_bool (0);