2 /* Include this before everything else, for various large-file definitions */
8 * We want a reentrant scanner.
13 * We don't read interactively from the terminal.
15 %option never-interactive
18 * We want to stop processing when we get to the end of the input.
23 * The type for the state we keep for the scanner (and parser).
25 %option extra-type="protobuf_lang_state_t *"
28 * Prefix scanner routines with "protobuf_lang_" rather than "yy", so this scanner
29 * can coexist with other scanners.
31 %option prefix="protobuf_lang_"
34 * We have to override the memory allocators so that we don't get
35 * "unused argument" warnings from the yyscanner argument (which
36 * we don't use, as we have a global memory allocator).
38 * We provide, as macros, our own versions of the routines generated by Flex,
39 * which just call malloc()/realloc()/free() (as the Flex versions do),
40 * discarding the extra argument.
50 /* protobuf_lang_scanner.l
52 * C Protocol Buffers Language Lexer (for *.proto files)
53 * Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com>
55 * SPDX-License-Identifier: GPL-2.0-or-later
61 #include "protobuf_lang_tree.h"
62 #include "protobuf_lang_parser.h"
65 * Disable diagnostics in the code generated by Flex.
70 * Sleazy hack to suppress compiler warnings in yy_fatal_error().
72 #define YY_EXIT_FAILURE ((void)yyscanner, 2)
75 * Macros for the allocators, to discard the extra argument.
77 #define protobuf_lang_alloc(size, yyscanner) (void *)malloc(size)
78 #define protobuf_lang_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
79 #define protobuf_lang_free(ptr, yyscanner) free((char *)ptr)
83 /* Extended error handling function defined in protobuf_lang_grammar.lemon */
84 void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);
86 /* duplicate the text and keep the pointer in parser state for freeing later automatically */
88 strdup_and_store(void* yyscanner, const char* text);
90 #define PROTOBUF_LANG_PARSE(token_type) \
91 protobuf_lang_get_extra(yyscanner)->tmp_token = g_new0(protobuf_lang_token_t, 1); \
92 pbl_store_struct_token(protobuf_lang_get_extra(yyscanner), protobuf_lang_get_extra(yyscanner)->tmp_token); \
93 protobuf_lang_get_extra(yyscanner)->tmp_token->v = strdup_and_store(yyscanner, yytext); \
94 protobuf_lang_get_extra(yyscanner)->tmp_token->ln = protobuf_lang_get_lineno(yyscanner); \
102 /* operations or symbols (PT_ means PBL Token) */
103 "(" PROTOBUF_LANG_PARSE(PT_LPAREN);
104 ")" PROTOBUF_LANG_PARSE(PT_RPAREN);
105 "[" PROTOBUF_LANG_PARSE(PT_LBRACKET);
106 "]" PROTOBUF_LANG_PARSE(PT_RBRACKET);
107 "{" PROTOBUF_LANG_PARSE(PT_LCURLY);
108 "}" PROTOBUF_LANG_PARSE(PT_RCURLY);
109 "==" PROTOBUF_LANG_PARSE(PT_EQUAL);
110 "!=" PROTOBUF_LANG_PARSE(PT_NOTEQUAL);
111 "<>" PROTOBUF_LANG_PARSE(PT_NOTEQUAL2);
112 ">=" PROTOBUF_LANG_PARSE(PT_GEQUAL);
113 "<=" PROTOBUF_LANG_PARSE(PT_LEQUAL);
114 "+=" PROTOBUF_LANG_PARSE(PT_ASSIGN_PLUS);
115 "=" PROTOBUF_LANG_PARSE(PT_ASSIGN);
116 "+" PROTOBUF_LANG_PARSE(PT_PLUS);
117 "-" PROTOBUF_LANG_PARSE(PT_MINUS);
118 "*" PROTOBUF_LANG_PARSE(PT_MULTIPLY);
119 "/" PROTOBUF_LANG_PARSE(PT_DIV);
120 "||" PROTOBUF_LANG_PARSE(PT_LOGIC_OR);
121 "|" PROTOBUF_LANG_PARSE(PT_OR);
122 "&&" PROTOBUF_LANG_PARSE(PT_LOGIC_AND);
123 "&" PROTOBUF_LANG_PARSE(PT_AND);
124 "!" PROTOBUF_LANG_PARSE(PT_NOT);
125 "~" PROTOBUF_LANG_PARSE(PT_NEG);
126 "^" PROTOBUF_LANG_PARSE(PT_XOR);
127 "<<" PROTOBUF_LANG_PARSE(PT_SHL);
128 ">>" PROTOBUF_LANG_PARSE(PT_SHR);
129 "%" PROTOBUF_LANG_PARSE(PT_PERCENT);
130 "$" PROTOBUF_LANG_PARSE(PT_DOLLAR);
131 "?" PROTOBUF_LANG_PARSE(PT_COND);
132 ";" PROTOBUF_LANG_PARSE(PT_SEMICOLON);
133 "." PROTOBUF_LANG_PARSE(PT_DOT);
134 "," PROTOBUF_LANG_PARSE(PT_COMMA);
135 ":" PROTOBUF_LANG_PARSE(PT_COLON);
136 "<" PROTOBUF_LANG_PARSE(PT_LESS);
137 ">" PROTOBUF_LANG_PARSE(PT_GREATER);
140 syntax PROTOBUF_LANG_PARSE(PT_SYNTAX);
141 import PROTOBUF_LANG_PARSE(PT_IMPORT);
142 weak PROTOBUF_LANG_PARSE(PT_WEAK);
143 public PROTOBUF_LANG_PARSE(PT_PUBLIC);
144 package PROTOBUF_LANG_PARSE(PT_PACKAGE);
145 option PROTOBUF_LANG_PARSE(PT_OPTION);
146 required PROTOBUF_LANG_PARSE(PT_REQUIRED);
147 optional PROTOBUF_LANG_PARSE(PT_OPTIONAL);
148 repeated PROTOBUF_LANG_PARSE(PT_REPEATED);
149 oneof PROTOBUF_LANG_PARSE(PT_ONEOF);
150 map PROTOBUF_LANG_PARSE(PT_MAP);
151 reserved PROTOBUF_LANG_PARSE(PT_RESERVED);
152 enum PROTOBUF_LANG_PARSE(PT_ENUM);
153 group PROTOBUF_LANG_PARSE(PT_GROUP);
154 extend PROTOBUF_LANG_PARSE(PT_EXTEND);
155 extensions PROTOBUF_LANG_PARSE(PT_EXTENSIONS);
156 message PROTOBUF_LANG_PARSE(PT_MESSAGE);
157 service PROTOBUF_LANG_PARSE(PT_SERVICE);
158 rpc PROTOBUF_LANG_PARSE(PT_RPC);
159 stream PROTOBUF_LANG_PARSE(PT_STREAM);
160 returns PROTOBUF_LANG_PARSE(PT_RETURNS);
161 to PROTOBUF_LANG_PARSE(PT_TO);
164 0|[1-9][0-9]* PROTOBUF_LANG_PARSE(PT_DECIMALLIT);
165 0[0-7]* PROTOBUF_LANG_PARSE(PT_OCTALLIT);
166 0[xX][0-9a-fA-F]+ PROTOBUF_LANG_PARSE(PT_HEXLIT);
168 /* Using extended identifier because we care only about position */
169 [a-zA-Z0-9_.][a-zA-Z0-9_.+-]* PROTOBUF_LANG_PARSE(PT_IDENT);
170 \"(\'|\\\"|[^\""\n"])*\" PROTOBUF_LANG_PARSE(PT_STRLIT);
171 \'(\"|\\\'|[^\'"\n"])*\' PROTOBUF_LANG_PARSE(PT_STRLIT);
175 "/*" { old_status = YY_START; BEGIN COMMENT; }
176 <COMMENT>"*/" { BEGIN old_status; }
177 <COMMENT>([^*]|\n)+|.
181 /* prevent flex jam */
182 . { pbl_parser_error(protobuf_lang_get_extra(yyscanner), "unexpected token in proto file!\n"); }
187 strdup_and_store(void* yyscanner, const char* text) {
188 return pbl_store_string_token(protobuf_lang_get_extra(yyscanner), g_strdup(text));
192 * Turn diagnostics back on, so we check the code that we've written.