fixed pointer arithmetric bug
[swftools.git] / src / parser.lex
blob2d1d3169c415200e468a0eea7f91336e523b65e7
1 %{
3 #include <string.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include "../lib/q.h"
7 #include "parser.h"
8 #include "../lib/utf8.h"
10 //RVALUE         {NUMBER}|{PERCENT}|{NAME}|\"{STRING}\"|{DIM}
11 //<a>.                {printf("<a>%s\n", yytext);}
12 // %x: exclusive, %s: inclusive
13 char*type_names[] = {"twip","number","command","string","assignment","identifier","label","end"};
14 static int line=1;
15 static int column=1;
17 mem_t strings;
18 mem_t tokens;
20 static void count(char*text, int len, int condition)
22     int t;
23     for(t=0;t<len;t++) {
24         if(text[t]=='\n') {
25             line++;
26             column=1;
27         } else {
28             column++;
29         }
30     }
33 static char*prefix = 0;
35 static void unescapeString(string_t * tmp)
37     char *p, *p1;
38     /* fixme - this routine expects the string to be
39        null-terminated */
41     for (p1=(char*)tmp->str; (p=strchr(p1, '\\')); p1 = p+1) 
42     {
43         int nr=2;
44         int new=1;
45         switch(p[1])
46         {
47             case '\\': p[0] = '\\'; break;
48             case '"': p[0] = '"'; break;
49             case 'b': p[0] = '\b'; break;
50             case 'f': p[0] = '\f'; break;
51             case 'n': p[0] = '\n'; break;
52             case 'r': p[0] = '\r'; break;
53             case 't': p[0] = '\t'; break;
54             case 'x':  case 'u': {
55                 int max=4;
56                 int num=0;
57                 char*utf8;
58                 char bracket = 0;
59                 if(p[1] == 'u')
60                     max = 6;
61                 if(p[2] == '{')  {
62                     bracket = 1;nr++;max++;
63                 }
64                 while(strchr("0123456789abcdefABCDEF", p[nr]) && (bracket || nr < max)) {
65                     num <<= 4;
66                     if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0';
67                     if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10;
68                     if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10;
69                     nr++;
70                 }
71                 if(bracket && p[nr]=='}') {
72                     bracket = 0;
73                     nr++;
74                 }
75                 utf8 = getUTF8(num);
76                 new = strlen(utf8);
77                 memcpy(p, utf8, new); // do not copy the terminating zero
78                 break;
79             }
80             default:
81                 continue;
82         }
83         tmp->len -= (nr-new); 
84         {
85             int t;
86             char*to=p+new,*from=p+nr;
87             while(*from) {
88                 *to = *from;
89                 to++;
90                 from++;
91             }
92         }
93     }
96 static void store(enum type_t type, int line, int column, char*text, int length)
98     struct token_t token;
99     string_t tmp;
100     token.type = type;
101     token.line = line;
102     token.column = column;
103     //printf("->%d(%s) %s\n", type, type_names[type], text);fflush(stdout);
105     token.text_pos = 0;
106     token.text = 0;
107     switch(type) {
108         case END:
109             string_set2(&tmp, "", 0);
110             token.text_pos = mem_putstring(&strings, tmp);
111         break;
112         case STRING:
113             string_set2(&tmp, text+1, length-2);
114             unescapeString(&tmp);
115             token.text_pos = mem_putstring(&strings, tmp);
116         break;
117         case TWIP: 
118         case NUMBER: 
119         case IDENTIFIER:
120             string_set2(&tmp, text, length);
121             if(prefix) {
122                 //strcat
123                 token.text_pos = mem_put(&strings, prefix, strlen(prefix));
124                 mem_putstring(&strings, tmp);
125             } else {
126                 token.text_pos = mem_putstring(&strings, tmp);
127             }
128             prefix = 0;
129         break;
130         case RAWDATA:
131             string_set2(&tmp, text+1/*:*/, length-5/*.end*/);
132             token.text_pos = mem_putstring(&strings, tmp);
133         break;
134         case COMMAND:
135             string_set2(&tmp, text+1, length-1);
136             token.text_pos = mem_putstring(&strings, tmp);
137         break;
138         case ASSIGNMENT: {
139             char*x = &text[length-1];
140             if(x[-1] == '-' || x[-1] == '+')
141                 x--;
142             do{x--;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
143             x++; //first space
144             string_set2(&tmp, text, x-text);
145             token.text_pos = mem_putstring(&strings, tmp);
146             /*char*y,*x = strchr(text, '=');
147             if(!x) exit(1);
148             y=x;
149             do{y--;} while(*y==32 || *y==10 || *y==13 || *y=='\t');
150             do{x++;} while(*x==32 || *x==10 || *x==13 || *x=='\t');
151             token.text1 = (char*)put(&strings, text, y-text + 1, 1);
152             token.text2 = (char*)put(&strings, x, length-(x-text), 1);*/
153         } break;
154     }
156     mem_put(&tokens, &token, sizeof(struct token_t));
157     prefix = 0;
160 #define MAX_INCLUDE_DEPTH 16
161 static YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
162 static int line_stack[MAX_INCLUDE_DEPTH];
163 static int column_stack[MAX_INCLUDE_DEPTH];
164 static int include_stack_ptr = 0;
166 static void handleInclude(char*text, int len)
168     text+=9;len-=9;
169     while(len >=1 && (text[0] == ' ' || text[0] == '\t')) {
170         text++;len--;
171     }
172     while(len >= 1 && 
173           (text[len-1] == ' ' || 
174            text[len-1] == '\r' || 
175            text[len-1] == '\n')) {
176         len--;
177     }
178     if(len >= 2 && text[0] == '"' && text[len-1] == '"') {
179         text++; len-=2;
180     }
181     text[len] = 0;
182     if(include_stack_ptr >= MAX_INCLUDE_DEPTH) {
183         fprintf( stderr, "Includes nested too deeply" );
184         exit( 1 );
185     }
186     include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
187     line_stack[include_stack_ptr] = line;
188     column_stack[include_stack_ptr] = column;
189     include_stack_ptr++;
190     yyin = fopen(text, "rb");
191     if (!yyin) {
192         fprintf(stderr, "Couldn't open %s\n", text);
193         exit(1);
194     }
195     yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
197 #ifdef INITIAL
198     BEGIN(INITIAL);
199 #else
200     // best guess
201     BEGIN(0);
202 #endif
205 #define c() {count(yytext, yyleng, YY_START);}
206 #define s(type) {store(type, line, column, yytext, yyleng);}
209 %s R
210 %x BINARY
212 NAME     [a-zA-Z_./](-*[a-zA-Z0-9_./])*
213 TWIP     (-?[0-9]+(\.([0-9]([05])?)?)?)
214 NUMBER   -?[0-9]+(\.[0-9]*)?
215 PERCENT  {NUMBER}%
216 STRING   (\\.|[^\\"\n])*
217 S        [ \n\r\t]
218 RVALUE   \"{STRING}\"|([^ \n\r\t]+)
222 <BINARY>\] {c();BEGIN(0);}
223 <BINARY>.  {c();}
224 <BINARY>\n {c();}
225 {TWIP}/[ \n\r\t]            {s(TWIP);c();BEGIN(0);}
226 {NUMBER}/[ \n\r\t]          {s(NUMBER);c();BEGIN(0);}
227 ^#[^\n]*\n                  {c();}
228 [ \t\r]#[^\n]*\n            {c();}
229 \"{STRING}\"                {s(STRING);c();BEGIN(0);}
230 \"{STRING}$                 {c();printf("unterminated string in line %d: %s\n", line, yytext);exit(1);yyterminate();}
231 {NAME}{S}*\+=               {s(ASSIGNMENT);prefix="<plus>";c();BEGIN(R);}
232 {NAME}{S}*-=                {s(ASSIGNMENT);prefix="<minus>";c();BEGIN(R);}
233 {NAME}{S}*=                 {s(ASSIGNMENT);c();BEGIN(R);}
234 <R>{ /* values which appear only on the right-hand side of assignments, like: x=50% */
235     [^ :\n\t\r]*                    {s(IDENTIFIER);c();BEGIN(0);}
237 \.include{S}.*\n                    {handleInclude(yytext, yyleng);}
238 \.{NAME}                    {s(COMMAND);c();}
239 :([^.]|\.[^e]|\.e[^n]|\.en[^d]|\.end[^ \n\r\t]|[ \n\r\t])*\.end     {s(RAWDATA);c();}
240 {NAME}                      {s(IDENTIFIER);c();}
241 "["                         {c();BEGIN(BINARY);}
242 {S}                         {c();}
243 .                           {char c,c1=yytext[0];
244                              printf("Syntax error in line %d, %d: %s", line, column, yytext);
245                              while(1) {
246                                  c=input();
247                                  if(c=='\n' || c==EOF) 
248                                      break;
249                                 printf("%c", c);
250                              }
251                              if(c1>='0' && c1<='9')
252                                  printf(" (identifiers must not start with a digit)");
253                              printf("\n");
254                              exit(1);
255                              yyterminate();
256                             }
257 <<EOF>>                     {c();
258                              if ( --include_stack_ptr < 0 ) {
259                                 s(END);
260                                 yyterminate();
261                              } else {
262                                  yy_delete_buffer( YY_CURRENT_BUFFER );
263                                  yy_switch_to_buffer(
264                                       include_stack[include_stack_ptr] );
265                                  column = column_stack[include_stack_ptr];
266                                  line = line_stack[include_stack_ptr];
267                              }
268                             }
272 int yywrap()
274     return 1;
277 void freeTokens(struct token_t*file)
279     mem_clear(&strings);
280     mem_clear(&tokens);
283 struct token_t* generateTokens(char*filename)
285     FILE*fi;
286     int t;
287     struct token_t*result;
288     int num;
290     if(!filename)
291         return 0;
293     if(!strcmp(filename,"-"))
294         fi = stdin;
295     else
296         fi = fopen(filename, "rb");
298     if(!fi) {
299         printf("Couldn't find file %s\n", filename);
300         return 0;
301     }
302     yyin = fi;
304     mem_init(&strings);
305     mem_init(&tokens);
306     mem_put(&strings, &t, 1); //hack- make all valid strings start at position >0
308     line=1;
309     column=1;
311     yylex();
312 #ifdef YY_CURRENT_BUFFER
313     // some newer flex versions require it like this:
314     yy_delete_buffer(YY_CURRENT_BUFFER);
315 #else
316     yy_delete_buffer(yy_current_buffer);
317 #endif
319     result = (struct token_t*)tokens.buffer;
320     num = tokens.pos/sizeof(struct token_t);
322     for(t=0;t<tokens.pos/sizeof(struct token_t);t++) {
323         if(result[t].text_pos) {
324             result[t].text = &strings.buffer[result[t].text_pos];
325         }
326     }
328     if(fi!=stdin)
329         fclose(fi);
330     return result;