regen pidl all: rm epan/dissectors/pidl/*-stamp; pushd epan/dissectors/pidl/ && make...
[wireshark-sm.git] / epan / dtd_parse.l
blob8b8b64f1625e6e6de901f6ae24515ac138a796cc
1 %top {
2 /* Include this before everything else, for various large-file definitions */
3 #include "config.h"
4 #include <wireshark.h>
7 /*
8  * We want a reentrant scanner.
9  */
10 %option reentrant
13  * We don't use input, so don't generate code for it.
14  */
15 %option noinput
18  * We don't use unput, so don't generate code for it.
19  */
20 %option nounput
23  * We don't read interactively from the terminal.
24  */
25 %option never-interactive
28  * We want to stop processing when we get to the end of the input.
29  */
30 %option noyywrap
33  * The type for the state we keep for a scanner.
34  */
35 %option extra-type="Dtd_Parse_scanner_state_t *"
38  * Prefix scanner routines with "Dtd_Parse_" rather than "yy", so this scanner
39  * can coexist with other scanners.
40  */
41 %option prefix="Dtd_Parse_"
44  * We have to override the memory allocators so that we don't get
45  * "unused argument" warnings from the yyscanner argument (which
46  * we don't use, as we have a global memory allocator).
47  *
48  * We provide, as macros, our own versions of the routines generated by Flex,
49  * which just call malloc()/realloc()/free() (as the Flex versions do),
50  * discarding the extra argument.
51  */
52 %option noyyalloc
53 %option noyyrealloc
54 %option noyyfree
58         /* dtd_parse.l
59         * an XML dissector for Wireshark
60         * lexical analyzer for DTDs
61         *
62         * Copyright 2004, Luis E. Garcia Ontanon <luis@ontanon.org>
63         *
64         * Wireshark - Network traffic analyzer
65         * By Gerald Combs <gerald@wireshark.org>
66         * Copyright 1998 Gerald Combs
67         *
68         * This program is free software; you can redistribute it and/or
69         * modify it under the terms of the GNU General Public License
70         * as published by the Free Software Foundation; either version 2
71         * of the License, or (at your option) any later version.
72         *
73         * This program is distributed in the hope that it will be useful,
74         * but WITHOUT ANY WARRANTY; without even the implied warranty of
75         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
76         * GNU General Public License for more details.
77         *
78         * You should have received a copy of the GNU General Public License
79         * along with this program; if not, write to the Free Software
80         * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
81         */
83 #include <glib.h>
84 #include <string.h>
86 #include "dtd.h"
87 #include "dtd_grammar.h"
88 #include "dtd_parse.h"
91  * Disable diagnostics in the code generated by Flex.
92  */
93 DIAG_OFF_FLEX()
95         struct _proto_xmlpi_attr {
96                 const char* name;
97                 void (*act)(char*);
98         };
100         typedef struct {
101                 GString* input_string;
102                 size_t offsetx;
103                 size_t len;
104                 void* pParser;
105                 char* location;
106                 char* attr_name;
107         } Dtd_Parse_scanner_state_t;
109         static size_t my_yyinput(Dtd_Parse_scanner_state_t *state,char* buff,size_t size);
111         static dtd_token_data_t* new_token(char*,char*);
113         static dtd_build_data_t* build_data;
115         static void set_proto_name (char* val) { g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
116         static void set_media_type (char* val) { g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
117         static void set_proto_root (char* val) { g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
118         static void set_description (char* val) { g_free(build_data->description); build_data->description = g_strdup(val); }
119         static void set_recursive (char* val) { build_data->recursion = ( g_ascii_strcasecmp(val,"yes") == 0 ) ? true : false; }
121 #ifdef DEBUG_DTD_PARSER
122 #define DEBUG_DTD_TOKEN fprintf(stderr,"->%s (%i)%s\n",location,token_type,yytext)
123 #else
124 #define DEBUG_DTD_TOKEN
125 #endif
127 #define DTD_PARSE(token_type) \
128         {   DEBUG_DTD_TOKEN; \
129                 DtdParse(yyextra->pParser, (token_type), new_token(yytext, yyextra->location), build_data); \
130                 if(build_data->error->len > 0) yyterminate(); \
131         }
134 #define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput(yyextra,(buff),(max_size)) )
135 #define YY_USER_INIT BEGIN DTD;
138  * Flex (v 2.5.35) uses this symbol to "exclude" unistd.h
139  */
140 #ifdef _WIN32
141 #define YY_NO_UNISTD_H
142 #endif
145  * Sleazy hack to suppress compiler warnings in yy_fatal_error().
146  */
147 #define YY_EXIT_FAILURE ((void)yyscanner, 2)
150  * Macros for the allocators, to discard the extra argument.
151  */
152 #define Dtd_Parse_alloc(size, yyscanner)        (void *)malloc(size)
153 #define Dtd_Parse_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
154 #define Dtd_Parse_free(ptr, yyscanner)          free((char *)ptr)
158 comment_start "<!--"
159 comment_stop "-->"
161 start_xmlpi "<?"
163 location_xmlpi "wireshark:location"
164 protocol_xmlpi "wireshark:protocol"
166 get_attr_quote =[:blank:]*["]
167 avoid_editor_bug ["]
169 get_location_xmlpi  [^[:blank:]]+
171 stop_xmlpi "?>"
173 notation_tag       "<!"[:blank:]*NOTATION
175 special_start  "<!"
176 special_stop   ">"
177 whitespace     [[:blank:]\r\n]+
178 newline        \n
179 attlist_kw     ATTLIST
180 doctype_kw     DOCTYPE
181 element_kw     ELEMENT
183 pcdata         #PCDATA
184 any            ANY
185 cdata          #CDATA
187 iD             ID
188 idref          IDREF
189 idrefs         IDREFS
190 nmtoken        NMTOKEN
191 nmtokens       NMTOKENS
192 entity         ENTITY
193 entities       ENTITIES
194 notation       NOTATION
195 cdata_t        CDATA
197 empty          EMPTY
198 defaulT        #DEFAULT
199 fixed          #FIXED
200 required       #REQUIRED
201 implied        #IMPLIED
203 star           "*"
204 question       "?"
205 plus           "+"
206 open_parens    "("
207 close_parens   ")"
208 open_bracket   "["
209 close_bracket  "]"
210 comma          ","
211 pipe           "|"
212 dquote         ["]
214 name           [A-Za-z0-9][-a-zA-Z0-9_]*
215 dquoted        ["][^\"]*["]
216 squoted        ['][^\']*[']
218 %START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE IN_COMMENT IN_NOTATION
221 {whitespace}            ;
224 <DTD>{comment_start}            { BEGIN IN_COMMENT; }
225 <IN_COMMENT>[^-]?                               |
226 <IN_COMMENT>[-]                                 ;
227 <IN_COMMENT>{comment_stop}              { BEGIN DTD; }
229 <DTD>{notation_tag} { BEGIN IN_NOTATION; }
230 <IN_NOTATION>[^>]  ;
231 <IN_NOTATION>{special_stop} { BEGIN DTD; }
233 <DTD>{start_xmlpi}              {
234         BEGIN XMLPI;
237 <XMLPI>{location_xmlpi} {
238         BEGIN LOCATION;
241 <XMLPI>{protocol_xmlpi} {
242         BEGIN PROTOCOL;
245 <XMLPI><.> ;
246 <XMLPI>{stop_xmlpi} BEGIN DTD;
248 <LOCATION>{get_location_xmlpi} {
249         g_free(yyextra->location);
250         yyextra->location = g_strdup(yytext);
251         BEGIN DONE;
254 <DONE>{stop_xmlpi}  BEGIN DTD;
256 <PROTOCOL>{name} {
257         yyextra->attr_name = g_ascii_strdown(yytext, -1);
258         BEGIN GET_ATTR_QUOTE;
261 <GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
263 <GET_ATTR_QUOTE>. {
264         g_string_append_printf(build_data->error,
265                                         "error in wireshark:protocol xmpli at %s : could not find attribute value!",
266                                         yyextra->location);
267         yyterminate();
270 <GET_ATTR_VAL>[^"]+ {
271         /*"*/
272         struct _proto_xmlpi_attr* pa;
273         bool got_it = false;
274         static struct _proto_xmlpi_attr proto_attrs[] =
275         {
276                 { "proto_name", set_proto_name },
277                 { "media", set_media_type },
278                 { "root", set_proto_root },
279                 { "description", set_description },
280                 { "hierarchy", set_recursive },
281                 {NULL,NULL}
282         };
284         for(pa = proto_attrs; pa->name; pa++) {
285                 if (g_ascii_strcasecmp(yyextra->attr_name,pa->name) == 0) {
286                         pa->act(yytext);
287                         got_it = true;
288                         break;
289                 }
290         }
292         if (! got_it) {
293                 g_string_append_printf(build_data->error,
294                                                 "error in wireshark:protocol xmpli at %s : no such parameter %s!",
295                                                 yyextra->location, yyextra->attr_name);
296                 g_free(yyextra->attr_name);
297                 yyterminate();
298         }
300         g_free(yyextra->attr_name);
302         BEGIN GET_ATTR_CLOSE_QUOTE;
305 <GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
307 <PROTOCOL>{stop_xmlpi} BEGIN DTD;
309 <DTD>{special_start}         { DTD_PARSE(TOKEN_TAG_START); }
310 <DTD>{special_stop}          { DTD_PARSE(TOKEN_TAG_STOP); }
312 <DTD>{attlist_kw}            { DTD_PARSE(TOKEN_ATTLIST_KW); }
313 <DTD>{element_kw}            { DTD_PARSE(TOKEN_ELEMENT_KW); }
314 <DTD>{doctype_kw}            { DTD_PARSE(TOKEN_DOCTYPE_KW); }
316 <DTD>{pcdata}                { DTD_PARSE(TOKEN_ELEM_DATA); }
317 <DTD>{any}                   { DTD_PARSE(TOKEN_ELEM_DATA); }
318 <DTD>{cdata}                 { DTD_PARSE(TOKEN_ELEM_DATA); }
319 <DTD>{empty}                             { DTD_PARSE(TOKEN_EMPTY_KW); }
321 <DTD>{iD}                                { DTD_PARSE(TOKEN_ATT_TYPE); }
322 <DTD>{idref}                 { DTD_PARSE(TOKEN_ATT_TYPE); }
323 <DTD>{idrefs}                { DTD_PARSE(TOKEN_ATT_TYPE); }
324 <DTD>{nmtoken}               { DTD_PARSE(TOKEN_ATT_TYPE); }
325 <DTD>{nmtokens}              { DTD_PARSE(TOKEN_ATT_TYPE); }
326 <DTD>{entity}                { DTD_PARSE(TOKEN_ATT_TYPE); }
327 <DTD>{entities}              { DTD_PARSE(TOKEN_ATT_TYPE); }
328 <DTD>{notation}              { DTD_PARSE(TOKEN_ATT_TYPE); }
329 <DTD>{cdata_t}               { DTD_PARSE(TOKEN_ATT_TYPE); }
330 <DTD>{defaulT}               { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
331 <DTD>{fixed}                 { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
332 <DTD>{required}              { DTD_PARSE(TOKEN_ATT_DEF); }
333 <DTD>{implied}               { DTD_PARSE(TOKEN_ATT_DEF); }
335 <DTD>{star}                  { DTD_PARSE(TOKEN_STAR); }
336 <DTD>{question}              { DTD_PARSE(TOKEN_QUESTION); }
337 <DTD>{plus}                  { DTD_PARSE(TOKEN_PLUS); }
338 <DTD>{comma}                  { DTD_PARSE(TOKEN_COMMA); }
339 <DTD>{open_parens}           { DTD_PARSE(TOKEN_OPEN_PARENS); }
340 <DTD>{close_parens}          { DTD_PARSE(TOKEN_CLOSE_PARENS); }
341 <DTD>{open_bracket}          { DTD_PARSE(TOKEN_OPEN_BRACKET); }
342 <DTD>{close_bracket}         { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
343 <DTD>{pipe}                  { DTD_PARSE(TOKEN_PIPE); }
345 <DTD>{dquoted}               |
346 <DTD>{squoted}               { DTD_PARSE(TOKEN_QUOTED); }
347 <DTD>{name}                  { DTD_PARSE(TOKEN_NAME); }
352  * Turn diagnostics back on, so we check the code that we've written.
353  */
354 DIAG_ON_FLEX()
356 static dtd_token_data_t* new_token(char* text, char* location) {
357         dtd_token_data_t* t = g_new(dtd_token_data_t,1);
359         t->text = g_strdup(text);
360         t->location = g_strdup(location);
362         return t;
366 static size_t my_yyinput(Dtd_Parse_scanner_state_t *state, char* buff, size_t size) {
368         if (state->offsetx >= state->len) {
369                 return YY_NULL;
370         } else if (state->offsetx + size <= state->len) {
371                 memcpy(buff, state->input_string->str + state->offsetx, size);
372                 state->offsetx += size;
373                 return size;
374         } else {
375                 size = state->len - state->offsetx;
376                 memcpy(buff, state->input_string->str + state->offsetx, size);
377                 state->offsetx = state->len;
378                 return size;
379         }
382 extern dtd_build_data_t* dtd_parse(GString* s) {
383         yyscan_t scanner;
384         Dtd_Parse_scanner_state_t state;
386         if (Dtd_Parse_lex_init(&scanner) != 0) {
387 #ifdef DEBUG_DTD_PARSER
388                 fprintf(stderr, "Can't initialize scanner: %s\n",
389                     strerror(errno));
390 #endif
391                 return NULL;
392         }
394         state.input_string = s;
395         state.offsetx = 0;
396         state.len = state.input_string->len;
398         state.pParser = DtdParseAlloc(g_malloc);
400 #ifdef DEBUG_DTD_PARSER
401         DtdParseTrace(stderr, ">>");
402 #endif
404         build_data = g_new(dtd_build_data_t,1);
406         build_data->proto_name = NULL;
407         build_data->media_type = NULL;
408         build_data->description = NULL;
409         build_data->proto_root = NULL;
410         build_data->recursion = false;
412         build_data->elements = g_ptr_array_new();
413         build_data->attributes = g_ptr_array_new();
415         build_data->error = g_string_new("");
417         state.location = NULL;
418         state.attr_name = NULL;
420         /* Associate the state with the scanner */
421         Dtd_Parse_set_extra(&state, scanner);
423         Dtd_Parse_lex(scanner);
425         DtdParse(state.pParser, 0, NULL, build_data);
427         Dtd_Parse_lex_destroy(scanner);
429         g_free(state.location);
431         DtdParseFree(state.pParser, g_free);
433         return build_data;