Update ooo320-m1
[ooovba.git] / writerfilter / source / rtftok / RTFScanner.lex
blob32fee7a14092fb4700bd4a9d10e8335d9276d30f
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  * 
5  * Copyright 2008 by Sun Microsystems, Inc.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * $RCSfile: RTFScanner.lex,v $
10  *
11  * $Revision: 1.5 $
12  *
13  * This file is part of OpenOffice.org.
14  *
15  * OpenOffice.org is free software: you can redistribute it and/or modify
16  * it under the terms of the GNU Lesser General Public License version 3
17  * only, as published by the Free Software Foundation.
18  *
19  * OpenOffice.org is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  * GNU Lesser General Public License version 3 for more details
23  * (a copy is included in the LICENSE file that accompanied this code).
24  *
25  * You should have received a copy of the GNU Lesser General Public License
26  * version 3 along with OpenOffice.org.  If not, see
27  * <http://www.openoffice.org/license.html>
28  * for a copy of the LGPLv3 License.
29  *
30  ************************************************************************/
32 /* compile with flex++ -8 -f -+ -Sflex.skl -ortfparser.cxx rtfparser.lex */
33 %option yylineno
35   //#include <io.h>
36 #include <math.h>
37 #include <string.h>
38 #include <osl/file.h>
39 #include <assert.h>
40 #include <vector>
42 #if defined (UNX)
43   #define stricmp strcasecmp
44 #endif
46 writerfilter::rtftok::RTFScanner* writerfilter::rtftok::RTFScanner::createRTFScanner(class writerfilter::rtftok::RTFInputSource& inputSource, writerfilter::rtftok::RTFScannerHandler &eventHandler)
48   return new yyFlexLexer(&inputSource, eventHandler);
53 extern "C" {
54 //int isatty(int fd) { return 0; }
55 int yywrap(void) { return 1; }
59 oslFileHandle yy_osl_in=NULL;
60 #define YY_INPUT(buf,result,max_size) \
63         assert(yy_osl_in!=NULL);\
64         sal_Bool isEOF;\
65         oslFileError ret=osl_isEndOfFile( yy_osl_in, &isEOF );\
66         assert(ret==osl_File_E_None);\
67         if (isEOF)\
68         {\
69                 result=YY_NULL;\
70         }\
71         else\
72         {\
73         sal_uInt64 bytesRead;\
74         ret=osl_readFile( yy_osl_in, buf, max_size, &bytesRead);\
75         assert(ret==osl_File_E_None);\
76         result = bytesRead; \
77         }\
82 //extern RtfTokenizer* this;
83 void yyFlexLexer::split_ctrl(char *_yytext, char* token, char *value)
84    {
85      int i=0; // skip first '\'
86      while(_yytext[i]!=0 && (_yytext[i]=='\r' || _yytext[i]=='\n')) i++;
87      while(_yytext[i]!=0 && (_yytext[i]<'A' || (_yytext[i]>'Z' && _yytext[i]<'a') || _yytext[i]>'z')) i++; 
88      while(_yytext[i]!=0 && _yytext[i]>='A') *(token++)=_yytext[i++];
89      *token=0;
90      while(_yytext[i]!=0 && _yytext[i]>' ') *(value++)=_yytext[i++];
91      *value=0;
92    }
94  void yyFlexLexer::raise_ctrl(char* _yytext)
95    {
96      char token[50];
97      char value[50];
98      split_ctrl(_yytext, token, value);
99      eventHandler.ctrl(token, value);
100    }
102  void yyFlexLexer::raise_dest(char* _yytext)
103    {
104      char token[50];
105      char value[50];
106      split_ctrl(_yytext, token, value);
107      eventHandler.dest(token, value);
108    }
110 #define _num_of_destctrls (sizeof(_destctrls)/sizeof(_destctrls[0]))
111 static const char* _destctrls[] = {
112 "aftncn",
113 "aftnsep",
114 "aftnsepc",
115 "annotation",
116 "atnauthor",
117 "atndate",
118 "atnicn",
119 "atnid",
120 "atnparent",
121 "atnref",
122 "atntime",
123 "atrfend",
124 "atrfstart",
125 "author",
126 "background",
127 "bkmkend",
128 "bkmkstart",
129 "buptim",
130 "category",
131 "colortbl",
132 "comment",
133 "company",
134 "creatim",
135 "datafield",
136 "do",
137 "doccomm",
138 "docvar",
139 "dptxbxtext",
140 "falt",
141 "fchars",
142 "ffdeftext",
143 "ffentrymcr",
144 "ffexitmcr",
145 "ffformat",
146 "ffhelptext",
147 "ffl",
148 "ffname",
149 "ffstattext",
150 "field",
151 "file",
152 "filetbl",
153 "fldinst",
154 "fldrslt",
155 "fldtype",
156 "fname",
157 "fontemb",
158 "fontfile",
159 "fonttbl",
160 "footer",
161 "footer",
162 "footerf",
163 "footerl",
164 "footnote",
165 "formfield",
166 "ftncn",
167 "ftnsep",
168 "ftnsepc",
169 "g",
170 "generator",
171 "gridtbl",
172 "header",
173 "header",
174 "headerf",
175 "headerl",
176 "htmltag",
177 "info",
178 "keycode",
179 "keywords",
180 "lchars",
181 "levelnumbers",
182 "leveltext",
183 "lfolevel",
184 "list",
185 "listlevel",
186 "listname",
187 "listoverride",
188 "listoverridetable",
189 "listtable",
190 "listtext",
191 "manager",
192 "mhtmltag",
193 "nesttableprops",
194 "nextfile",
195 "nonesttables",
196 "nonshppict",
197 "objalias",
198 "objclass",
199 "objdata",
200 "object",
201 "objname",
202 "objsect",
203 "objtime",
204 "oldcprops",
205 "oldpprops",
206 "oldsprops",
207 "oldtprops",
208 "operator",
209 "panose",
210 "pgp",
211 "pgptbl",
212 "picprop",
213 "pict",
214 "pn",
215 "pnseclvl",
216 "pntext",
217 "pntxta",
218 "pntxtb",
219 "printim",
220 "private",
221 "pwd",
222 "pxe",
223 "result",
224 "revtbl",
225 "revtim",
226 "rsidtbl",
227 "rtf",
228 "rxe",
229 "shp",
230 "shpgrp",
231 "shpinst",
232 "shppict",
233 "shprslt",
234 "shptxt",
235 "sn",
236 "sp",
237 "stylesheet",
238 "subject",
239 "sv",
240 "tc",
241 "template",
242 "title",
243 "txe",
244 "ud",
245 "upr",
246 "urtf",
247 "userprops",
248 "xe"
251  void yyFlexLexer::raise_destOrCtrl(char* _yytext)
252    {
253      char token[50];
254      char value[50];
255      split_ctrl(_yytext, token, value);
256      char* result=(char*)bsearch(token, _destctrls, _num_of_destctrls, 20, (int (*)(const void*, const void*))stricmp);
257      if (result)
258        {
259          eventHandler.dest(token, value);
260        }
261      else 
262        {
263          eventHandler.lbrace();
264          eventHandler.ctrl(token, value);
265        }
266    }
271 \{\\upr\{" "? { /* skip upr destination */
272   int c;
273   int br=1;
274   while (br>0 && (c = yyinput()) != EOF)
275     {
276       if (c=='}') br--;
277       if (c=='{') br++;
278     }
279   eventHandler.lbrace();
280   num_chars+=yyleng;
284 \\bin(("+"|"-")?[0-9]*)?" "? {
285   raise_dest(yytext);
286   num_chars+=yyleng;
287   int len=atoi(yytext+4);
288    num_chars+=len;
289    //   pictureBytes=2*len;
290   while ( len )
291     {
292       int c = yyinput();
293       eventHandler.addBinData((unsigned char)c);
294       len--;      
295     }
296   eventHandler.rbrace();
299 \{[\r\n]*\\\*\\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* stared dest word */
300         raise_dest(yytext);
301         num_chars+=yyleng;
303 \{[\r\n]*\\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* dest word */
304         raise_destOrCtrl(yytext);
306 \\[A-Za-z]+(("+"|"-")?[0-9]*)?" "? { /* ctrl word */
307         raise_ctrl(yytext);
308         star_flag=0;
309         num_chars+=yyleng;
311 \\\'[A-Fa-f0-9][A-Fa-f0-9] { /* hex char */
312   eventHandler.addHexChar(yytext);
313   num_chars+=yyleng;
315 \\* { /* star */
316         star_flag=1;
317         num_chars+=yyleng;
319 \{ { /* lbrace */
320         eventHandler.lbrace();
321         num_chars+=yyleng;
323 \} { /* rbrace */
324         eventHandler.rbrace();
325         num_chars+=yyleng;
327 \\\| { num_chars+=yyleng;}
328 \\~ {num_chars+=yyleng; eventHandler.addCharU(0xa0);}
329 \\- {num_chars+=yyleng;}
330 \\_ {num_chars+=yyleng;}
331 \\\: {num_chars+=yyleng;}
332 \n   {   ++num_lines;num_chars+=yyleng;}
333 \r {num_chars+=yyleng;}
334 \t {num_chars+=yyleng;}
335 " "(" "+) { eventHandler.addSpaces(yyleng); num_chars+=yyleng;}
336 . { eventHandler.addChar(yytext[0]); num_chars+=yyleng;}