2 /* protobuf_lang_parser.lemon
4 * C Protocol Buffers Language (PBL) Parser (for *.proto files)
5 * Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com>
7 * SPDX-License-Identifier: GPL-2.0-or-later
10 /* This parser is mainly to get MESSAGE, ENUM, and FIELD information from *.proto files.
11 * There are two formats of *.proto files:
12 * 1) Protocol Buffers Version 3 Language Specification:
13 * https://developers.google.com/protocol-buffers/docs/reference/proto3-spec
14 * 2) Protocol Buffers Version 2 Language Specification:
15 * https://developers.google.com/protocol-buffers/docs/reference/proto2-spec
16 * There are some errors about 'proto', 'option' (value) and 'reserved' (fieldName) definitions on that sites.
17 * This parser is created because Wireshark is mainly implemented in plain ANSI C but the official
18 * Protocol Buffers Language parser is implemented in C++.
27 #include <ws_diag_control.h>
28 #include <wsutil/file_util.h>
29 #include "protobuf_lang_tree.h"
30 #include "protobuf_lang_parser.h"
31 #include "protobuf_lang_scanner_lex.h"
33 #define NAME_TO_BE_SET "<NAME_TO_BE_SET>"
34 #define NEED_NOT_NAME "<NEED_NOT_NAME>"
36 static void *ProtobufLangParserAlloc(void *(*mallocProc)(size_t));
37 static void ProtobufLangParser(void *yyp, int yymajor, protobuf_lang_token_t *yyminor, protobuf_lang_state_t *state);
38 static void ProtobufLangParserFree(void *p, void (*freeProc)(void*));
40 /* Error handling function for parser */
41 void protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg);
43 /* Extended error handling function */
44 void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);
46 /* It's just the approximate line number which is gotten when a grammar rule is reduced
47 by the parser (lemon). That might be overridden by the lineno argument of
48 pbl_set_node_name() later. */
49 #define CUR_LINENO (protobuf_lang_get_lineno(state->scanner))
52 } /* end of %include */
58 %name ProtobufLangParser
60 %extra_argument { protobuf_lang_state_t *state }
62 %token_type { protobuf_lang_token_t* }
65 /* We manage memory allocated for token values by ourself */
66 (void) state; /* Mark unused, similar to Q_UNUSED */
67 (void) $$; /* Mark unused, similar to Q_UNUSED */
72 pbl_parser_error(state, "Syntax Error: unexpected token \"%s\"", yyminor->v);
74 pbl_parser_error(state, "Syntax Error: missing token");
76 state->grammar_error = TRUE;
80 pbl_parser_error(state, "Parse Error");
81 state->grammar_error = TRUE;
84 /* Keywords like 'syntax', 'message', etc can be used as the names of messages, fields or enums.
85 So we tell the lemon: "If you are unable to parse this keyword, try treating it as an identifier instead.*/
86 %fallback PT_IDENT PT_SYNTAX PT_IMPORT PT_WEAK PT_PUBLIC PT_PACKAGE PT_OPTION PT_REQUIRED PT_OPTIONAL.
87 %fallback PT_IDENT PT_REPEATED PT_ONEOF PT_MAP PT_RESERVED PT_ENUM PT_GROUP PT_EXTEND PT_EXTENSIONS.
88 %fallback PT_IDENT PT_MESSAGE PT_SERVICE PT_RPC PT_STREAM PT_RETURNS PT_TO.
90 %type strLit { gchar* }
91 %type label { gchar* }
93 %type keyType { gchar* }
94 %type messageType { gchar* }
95 %type constant { gchar* }
97 %type exIdent { protobuf_lang_token_t* }
98 %type optionName { protobuf_lang_token_t* }
99 %type messageName { protobuf_lang_token_t* }
100 %type enumName { protobuf_lang_token_t* }
101 %type streamName { protobuf_lang_token_t* }
102 %type fieldName { protobuf_lang_token_t* }
103 %type oneofName { protobuf_lang_token_t* }
104 %type mapName { protobuf_lang_token_t* }
105 %type serviceName { protobuf_lang_token_t* }
106 %type rpcName { protobuf_lang_token_t* }
107 %type groupName { protobuf_lang_token_t* }
109 %type protoBody { pbl_node_t* }
110 %type topLevelDef { pbl_node_t* }
111 %type message { pbl_node_t* }
112 %type messageBody { pbl_node_t* }
113 %type rpc { pbl_node_t* }
114 %type rpcDecl { pbl_node_t* }
115 %type field { pbl_node_t* }
116 %type oneofField { pbl_node_t* }
117 %type enum { pbl_node_t* }
118 %type enumBody { pbl_node_t* }
119 %type enumField { pbl_node_t* }
120 %type service { pbl_node_t* }
121 %type serviceBody { pbl_node_t* }
122 %type stream { pbl_node_t* }
123 %type streamDecl { pbl_node_t* }
124 %type fieldOptions { pbl_node_t* }
125 %type fieldOption { pbl_node_t* }
126 %type oneof { pbl_node_t* }
127 %type oneofBody { pbl_node_t* }
128 %type mapField { pbl_node_t* }
129 %type group { pbl_node_t* }
130 %type extend { pbl_node_t* }
131 %type extendBody { pbl_node_t* }
133 %type intLit { uint64_t }
135 %type fieldNumber { int }
136 %type enumNumber { int }
138 /* We don't care about the types of following nodes:
139 syntax import package option enumValueOptions enumValueOption rpcBody streamBody
140 extensions reserved ranges range quoteFieldNames emptyStatement
145 /* v2/v3: proto = syntax { import | package | option | topLevelDef | emptyStatement } */
146 /* Official PBL bugfix: proto = { syntax } { import | package | option | topLevelDef | emptyStatement }
147 The default syntax version is "proto2". */
148 proto ::= wholeProtoBody.
149 proto ::= syntax wholeProtoBody.
151 wholeProtoBody ::= protoBody(B).
153 /* set real package name */
154 pbl_set_node_name(B, state->file->package_name_lineno, state->file->package_name);
155 /* use the allocate mem of the name of the package node */
156 state->file->package_name = pbl_get_node_name(B);
157 /* put this file data into package tables */
158 pbl_node_t* packnode = (pbl_node_t*)g_hash_table_lookup(state->pool->packages, state->file->package_name);
160 pbl_merge_children(packnode, B);
163 g_hash_table_insert(state->pool->packages, g_strdup(state->file->package_name), B);
167 /* v2: syntax = "syntax" "=" quote "proto2" quote ";" */
168 /* v3: syntax = "syntax" "=" quote "proto3" quote ";" */
169 syntax ::= PT_SYNTAX PT_ASSIGN strLit(B) PT_SEMICOLON.
171 if (!strcmp(B, "proto3")) {
172 state->file->syntax_version = 3;
173 } else if (!strcmp(B, "proto2")) {
174 state->file->syntax_version = 2;
176 pbl_parser_error(state, "Unrecognized syntax identifier [%s]. This parser only recognizes \"proto3\" or \"proto2\"!", B);
177 state->grammar_error = TRUE;
181 protoBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_PACKAGE, NAME_TO_BE_SET); } /* create am empty package node */
182 protoBody ::= protoBody import. /* default action is {A = B; } */
183 protoBody ::= protoBody package.
184 protoBody ::= protoBody option.
185 protoBody(A) ::= protoBody(B) topLevelDef(C). { A = B; pbl_add_child(A, C); }
186 protoBody ::= protoBody emptyStatement.
188 /* v2/v3: import = "import" [ "weak" | "public" ] strLit ";" */
189 import ::= PT_IMPORT strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); } /* append file to todo list */
190 import ::= PT_IMPORT PT_PUBLIC strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
191 import ::= PT_IMPORT PT_WEAK strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
193 /* v2/v3: package = "package" fullIdent ";" */
194 package ::= PT_PACKAGE exIdent(B) PT_SEMICOLON.
195 { /* The memory of (B) will be freed after parsing, but the package_name will
196 be replaced by the new-allocated name of package node late */
197 state->file->package_name = B->v;
198 state->file->package_name_lineno = B->ln;
201 /* v2/v3: option = "option" optionName "=" constant ";" */
202 /* Official PBL bugfix: option = "option" optionName "=" ( constant | customOptionValue ) ";" */
203 option ::= PT_OPTION optionName PT_ASSIGN constant PT_SEMICOLON.
204 option ::= PT_OPTION optionName PT_ASSIGN customOptionValue PT_SEMICOLON.
206 /* v2/v3: optionName = ( ident | "(" fullIdent ")" ) { "." ident } */
207 /* Official PBL bugfix: optionName = ( ident | "(" fullIdent ")" ) { "." ( ident | "(" fullIdent ")" ) } */
208 extIdentInParentheses(A) ::= PT_LPAREN exIdent(B) PT_RPAREN.
209 { A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", NULL)); }
210 optionName ::= exIdent.
211 optionName ::= extIdentInParentheses.
212 optionName(A) ::= optionName(B) exIdent(C). // Note that the exIdent contains "."
213 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, C->v, NULL)); }
214 optionName(A) ::= optionName(B) PT_DOT extIdentInParentheses(C).
215 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); }
216 optionName(A) ::= optionName(B) extIdentInParentheses(C).
217 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); }
219 /* Allow format which not defined in official PBL specification like:
220 option (google.api.http) = { post: "/v3alpha/kv/put" body: "*" };
221 option (google.api.http) = { post: "/v3alpha/kv/put", body: "*" };
222 option (google.api.http) = { post: "/v3alpha/kv/put" { any format } body: "*" };
224 customOptionValue ::= PT_LCURLY customOptionBody PT_RCURLY.
226 /* The formal EBNF of customOptionBody seems to be */
228 customOptionBody ::= .
229 customOptionBody ::= customOptionBody optionField.
230 customOptionBody ::= customOptionBody PT_COMMA optionField.
231 customOptionBody ::= customOptionBody PT_SEMICOLON optionField.
233 optionField ::= optionName PT_COLON constant.
234 optionField ::= optionName PT_COLON customOptionValue.
235 optionField ::= optionName customOptionValue.
236 optionField ::= optionName PT_COLON array.
238 array ::= PT_LBRACKET arrayBody PT_RBRACKET.
239 arrayBodyConst ::= constant.
240 arrayBodyConst ::= arrayBody PT_COMMA constant.
241 arrayBodyCustom ::= customOptionValue.
242 arrayBodyCustom ::= arrayBody PT_COMMA customOptionValue.
243 arrayBody ::= arrayBodyConst.
244 arrayBody ::= arrayBodyCustom.
246 /* but for handling unexpected situations, we still use following EBNF */
247 customOptionBody ::= .
248 customOptionBody ::= customOptionBody exIdent.
249 customOptionBody ::= customOptionBody PT_STRLIT.
250 customOptionBody ::= customOptionBody symbolsWithoutCurly.
251 customOptionBody ::= customOptionBody intLit.
252 customOptionBody ::= customOptionBody customOptionValue.
254 symbolsWithoutCurly ::= PT_LPAREN.
255 symbolsWithoutCurly ::= PT_RPAREN.
256 symbolsWithoutCurly ::= PT_LBRACKET.
257 symbolsWithoutCurly ::= PT_RBRACKET.
258 symbolsWithoutCurly ::= PT_EQUAL.
259 symbolsWithoutCurly ::= PT_NOTEQUAL.
260 symbolsWithoutCurly ::= PT_NOTEQUAL2.
261 symbolsWithoutCurly ::= PT_GEQUAL.
262 symbolsWithoutCurly ::= PT_LEQUAL.
263 symbolsWithoutCurly ::= PT_ASSIGN_PLUS.
264 symbolsWithoutCurly ::= PT_ASSIGN.
265 symbolsWithoutCurly ::= PT_PLUS.
266 symbolsWithoutCurly ::= PT_MINUS.
267 symbolsWithoutCurly ::= PT_MULTIPLY.
268 symbolsWithoutCurly ::= PT_DIV.
269 symbolsWithoutCurly ::= PT_LOGIC_OR.
270 symbolsWithoutCurly ::= PT_OR.
271 symbolsWithoutCurly ::= PT_LOGIC_AND.
272 symbolsWithoutCurly ::= PT_AND.
273 symbolsWithoutCurly ::= PT_NOT.
274 symbolsWithoutCurly ::= PT_NEG.
275 symbolsWithoutCurly ::= PT_XOR.
276 symbolsWithoutCurly ::= PT_SHL.
277 symbolsWithoutCurly ::= PT_SHR.
278 symbolsWithoutCurly ::= PT_PERCENT.
279 symbolsWithoutCurly ::= PT_DOLLAR.
280 symbolsWithoutCurly ::= PT_COND.
281 symbolsWithoutCurly ::= PT_SEMICOLON.
282 symbolsWithoutCurly ::= PT_DOT.
283 symbolsWithoutCurly ::= PT_COMMA.
284 symbolsWithoutCurly ::= PT_COLON.
285 symbolsWithoutCurly ::= PT_LESS.
286 symbolsWithoutCurly ::= PT_GREATER.
288 /* v2: topLevelDef = message | enum | extend | service */
289 /* v3: topLevelDef = message | enum | service */
290 topLevelDef ::= message.
291 topLevelDef ::= enum.
292 topLevelDef ::= extend. /*v2 only */
293 topLevelDef ::= service.
295 /* v2/v3: message = "message" messageName messageBody */
296 message(A) ::= PT_MESSAGE messageName(B) PT_LCURLY messageBody(C) PT_RCURLY.
297 { A = C; pbl_set_node_name(A, B->ln, B->v); }
299 /* v2: messageBody = "{" { field | enum | message | extend | extensions | group | option | oneof | mapField | reserved | emptyStatement } "}" */
300 /* v3: messageBody = "{" { field | enum | message | option | oneof | mapField | reserved | emptyStatement } "}" */
301 messageBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
302 messageBody(A) ::= messageBody(B) field(C). { A = B; pbl_add_child(A, C); }
303 messageBody(A) ::= messageBody(B) enum(C). { A = B; pbl_add_child(A, C); }
304 messageBody(A) ::= messageBody(B) message(C). { A = B; pbl_add_child(A, C); }
305 messageBody ::= messageBody extend. /* v2 only */
306 messageBody ::= messageBody extensions. /* v2 only */
307 messageBody(A) ::= messageBody(B) group(C). /* v2 only */ { A = B; pbl_add_child(A, C); }
308 messageBody ::= messageBody option.
309 messageBody(A) ::= messageBody(B) oneof(C). { A = B; pbl_merge_children(A, C); pbl_free_node(C); }
310 messageBody(A) ::= messageBody(B) mapField(C). { A = B; pbl_add_child(A, C); }
311 messageBody ::= messageBody reserved.
312 messageBody ::= messageBody emptyStatement.
314 /* v2/v3: enum = "enum" enumName enumBody */
315 enum(A) ::= PT_ENUM enumName(B) PT_LCURLY enumBody(C) PT_RCURLY.
316 { A = C; pbl_set_node_name(A, B->ln, B->v); }
318 /* v2/v3: enumBody = "{" { option | enumField | emptyStatement } "}" */
319 /* Official PBL bugfix: enumBody = "{" { reserved | option | enumField | emptyStatement } "}" */
320 enumBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ENUM, NAME_TO_BE_SET); }
321 enumBody ::= enumBody reserved.
322 enumBody ::= enumBody option.
323 enumBody(A) ::= enumBody(B) enumField(C). { A = B; pbl_add_child(A, C); }
324 enumBody ::= enumBody emptyStatement.
326 /* v2/v3: enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" */
327 enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C) PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON.
328 { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
329 enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C).
330 { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
332 /* v2/v3: must be in the range of a 32-bit integer. negative values are not recommended. */
333 enumNumber(A) ::= intLit(B). { A = (int)B; }
334 enumNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
335 enumNumber(A) ::= PT_MINUS intLit(B). { A = -(int)B; }
337 /* v2/v3: enumValueOption { "," enumValueOption } */
338 enumValueOptions ::= enumValueOption.
339 enumValueOptions ::= enumValueOptions PT_COMMA enumValueOption.
341 /* v2/v3: enumValueOption = optionName "=" constant */
342 /* Official PBL bugfix: enumValueOption = optionName "=" ( constant | customOptionValue ) ";" */
343 enumValueOption ::= optionName PT_ASSIGN constant.
344 enumValueOption ::= optionName PT_ASSIGN customOptionValue.
346 /* v2: service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" */
347 /* v3: service = "service" serviceName "{" { option | rpc | emptyStatement } "}" */
348 service(A) ::= PT_SERVICE serviceName(B) PT_LCURLY serviceBody(C) PT_RCURLY.
349 { A = C; pbl_set_node_name(A, B->ln, B->v); }
351 serviceBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_SERVICE, NAME_TO_BE_SET); }
352 serviceBody ::= serviceBody option.
353 serviceBody(A) ::= serviceBody(B) rpc(C). { A = B; pbl_add_child(A, C); }
354 serviceBody ::= serviceBody emptyStatement.
355 serviceBody(A) ::= serviceBody(B) stream(C). /* v2 only */ { A = B; pbl_add_child(A, C); }
357 /* v2/v3: rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") */
358 rpc ::= rpcDecl PT_SEMICOLON.
359 rpc ::= rpcDecl PT_LCURLY rpcBody PT_RCURLY.
361 /* "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" */
362 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
363 { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, FALSE); }
364 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
365 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, FALSE); }
366 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
367 { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, TRUE); }
368 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
369 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
372 rpcBody ::= rpcBody option.
373 rpcBody ::= rpcBody emptyStatement.
375 /* v2: stream = "stream" streamName "(" messageType "," messageType ")" (( "{" { option | emptyStatement } "}") | ";" ) */
376 stream ::= streamDecl PT_SEMICOLON.
377 stream ::= streamDecl PT_LCURLY streamBody PT_RCURLY.
380 streamDecl(A) ::= PT_STREAM streamName(B) PT_LPAREN messageType(C) PT_COMMA messageType(D) PT_RPAREN.
381 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
385 streamBody ::= streamBody option.
386 streamBody ::= streamBody emptyStatement.
388 /* v2: label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
389 /* v3: field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
390 field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
391 { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, NULL); }
392 field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
393 { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, F); }
394 field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
395 { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, NULL); }
396 field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
397 { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, F); }
399 /* v2: label = "required" | "optional" | "repeated" */
400 label(A) ::= PT_REQUIRED(B). { A = B->v; }
401 label(A) ::= PT_OPTIONAL(B). { A = B->v; }
402 label(A) ::= PT_REPEATED(B). { A = B->v; }
404 /* v2/v3: type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64"
405 | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64"
406 | "bool" | "string" | "bytes" | messageType | enumType
408 type(A) ::= exIdent(B). { A = B->v; }
410 /* v2/v3: The smallest field number is 1, and the largest is 2^29 - 1, or 536,870,911. */
411 fieldNumber(A) ::= intLit(B). { A = (int)B; }
412 fieldNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
414 /* v2/v3: fieldOptions = fieldOption { "," fieldOption } */
415 fieldOptions(A) ::= fieldOption(B).
416 { A = pbl_create_node(state->file, CUR_LINENO, PBL_OPTIONS, NEED_NOT_NAME); pbl_add_child(A, B); }
417 fieldOptions(A) ::= fieldOptions(B) PT_COMMA fieldOption(C).
418 { A = B; pbl_add_child(A, C); }
420 /* v2/v3: fieldOption = optionName "=" constant */
421 /* Official PBL bugfix: fieldOption = optionName "=" ( constant | customOptionValue ) ";" */
422 fieldOption(A) ::= optionName(B) PT_ASSIGN constant(C).
423 { A = pbl_create_option_node(state->file, B->ln, B->v, C); }
424 fieldOption(A) ::= optionName(B) PT_ASSIGN customOptionValue.
425 { A = pbl_create_option_node(state->file, B->ln, B->v, pbl_store_string_token(state, g_strdup("{ ... }"))); }
427 /* v2 only: group = label "group" groupName "=" fieldNumber messageBody */
428 /* Official PBL bugfix: there is no label if the 'group' is a member of oneof body */
429 group(A) ::= PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY.
430 { A = C; pbl_set_node_name(A, B->ln, B->v); }
431 group(A) ::= label PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY.
432 { A = C; pbl_set_node_name(A, B->ln, B->v); }
434 groupName ::= exIdent.
436 /* v2/v3: oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" */
437 /* Official PBL bugfix: oneof = "oneof" oneofName "{" { oneofField | option | group | emptyStatement } "}" */
438 oneof(A) ::= PT_ONEOF oneofName(B) PT_LCURLY oneofBody(C) PT_RCURLY.
439 { A = C; pbl_set_node_name(A, B->ln, B->v); }
441 oneofBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ONEOF, NAME_TO_BE_SET); }
442 oneofBody(A) ::= oneofBody(B) oneofField(C). { A = B; pbl_add_child(A, C); }
443 oneofBody ::= oneofBody option.
444 oneofBody ::= oneofBody group.
445 oneofBody ::= oneofBody emptyStatement.
447 /* v2/v3: oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
448 oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_LBRACKET fieldOptions(E) PT_RBRACKET PT_SEMICOLON.
449 { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, E); }
450 oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_SEMICOLON.
451 { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, NULL); }
453 /* v2/v3: mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
454 mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
456 A = pbl_create_map_field_node(state->file, D->ln, D->v, E, F);
457 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
458 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
460 mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
462 A = pbl_create_map_field_node(state->file, D->ln, D->v, E, NULL);
463 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
464 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
467 /* keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
468 "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" */
469 keyType(A) ::= exIdent(B). { A = B->v; }
471 /* v2 only: extensions = "extensions" ranges ";" */
472 extensions ::= PT_EXTENSIONS ranges PT_SEMICOLON.
474 /* v2/v3: reserved = "reserved" ( ranges | fieldNames ) ";" */
475 reserved ::= PT_RESERVED ranges PT_SEMICOLON.
476 reserved ::= PT_RESERVED quoteFieldNames PT_SEMICOLON.
478 /* v2/v3: ranges = range { "," range } */
480 ranges ::= ranges PT_COMMA range.
482 /* v2/v3: range = intLit [ "to" ( intLit | "max" ) ] */
484 range ::= intLit PT_TO intLit.
485 range ::= intLit PT_TO exIdent.
487 /* v2/v3: fieldNames = fieldName { "," fieldName }
488 Note that there is an error in BNF definition about reserved fieldName. It's strLit, not ident.
490 quoteFieldNames ::= strLit.
491 quoteFieldNames ::= quoteFieldNames PT_COMMA strLit.
493 /* v2/v3: extend = "extend" messageType "{" {field | group | emptyStatement} "}"
494 Note that creating custom options uses extensions, which are permitted only for custom options in proto3.
495 We don't use custom options while parsing packet, so we just ignore the 'extend'.
497 extend(A) ::= PT_EXTEND messageType PT_LCURLY extendBody(B) PT_RCURLY.
498 { A = NULL; pbl_free_node(B); }
500 extendBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
501 extendBody(A) ::= extendBody(B) field(C). { A = B; pbl_add_child(A, C); }
502 extendBody(A) ::= extendBody(B) group(C). { A = B; pbl_add_child(A, C); }
503 extendBody ::= extendBody emptyStatement.
505 messageName ::= exIdent.
506 enumName ::= exIdent.
507 streamName ::= exIdent.
508 fieldName ::= exIdent.
509 oneofName ::= exIdent.
511 serviceName ::= exIdent.
514 /* messageType = [ "." ] { ident "." } messageName */
515 messageType(A) ::= exIdent(B). { A = B->v; }
517 /* enumType = [ "." ] { ident "." } enumName */
518 /*enumType ::= exIdent.*/
520 /* intLit = decimalLit | octalLit | hexLit */
521 intLit(A) ::= PT_DECIMALLIT(B). { A = g_ascii_strtoull(B->v, NULL, 10); }
522 intLit(A) ::= PT_OCTALLIT(B). { A = g_ascii_strtoull(B->v+1, NULL, 8); }
523 intLit(A) ::= PT_HEXLIT(B). { A = g_ascii_strtoull(B->v+2, NULL, 16); }
525 /* emptyStatement = ";" */
526 emptyStatement ::= PT_SEMICOLON.
528 /* constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit */
529 constant(A) ::= exIdent(B). { A = B->v; } /* boolLit is parsed as exIdent */
531 constant(A) ::= intLit(B). { A = pbl_store_string_token(state, ws_strdup_printf("%" PRIu64, B)); }
532 constant(A) ::= PT_PLUS intLit(B). { A = pbl_store_string_token(state, ws_strdup_printf("%" PRIu64, B)); }
533 constant(A) ::= PT_MINUS intLit(B). { A = pbl_store_string_token(state, ws_strdup_printf("-%" PRIu64, B)); }
534 constant(A) ::= PT_PLUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("+", B->v, NULL)); } /* This cover floatLit. */
535 constant(A) ::= PT_MINUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("-", B->v, NULL)); }
537 exIdent ::= PT_IDENT.
539 strLit(A) ::= PT_STRLIT(B). { A = pbl_store_string_token(state, g_strndup(B->v + 1, strlen(B->v) - 2)); }
540 /* support one string being splitted into multi-lines */
541 strLit(A) ::= strLit(B) PT_STRLIT(C). { gchar *v = g_strndup(C->v + 1, strlen(C->v) - 2); A = pbl_store_string_token(state, g_strconcat(B, v, NULL)); g_free(v); }
546 protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg)
549 void(*error_cb)(const char *format, ...);
550 const char* filepath = (state && state->file) ?
551 state->file->filename : "UNKNOWN";
553 error_cb = (state && state->pool->error_cb) ?
554 state->pool->error_cb : pbl_printf;
556 lineno = yyscanner ? protobuf_lang_get_lineno(yyscanner) : -1;
559 error_cb("Protobuf: Parsing file [%s:%d] failed: %s\n", filepath, lineno, msg);
561 error_cb("Protobuf: Parsing file [%s] failed: %s\n", filepath, msg);
566 pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...)
572 msg = ws_strdup_vprintf(fmt, ap);
573 scanner = state ? state->scanner : NULL;
574 protobuf_lang_error(scanner, state, msg);
580 pbl_clear_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool)
588 state->grammar_error = FALSE;
589 state->tmp_token = NULL;
591 if (state->scanner) {
592 protobuf_lang_lex_destroy(state->scanner);
593 state->scanner = NULL;
596 if (state->pParser) {
597 ProtobufLangParserFree(state->pParser, g_free);
598 state->pParser = NULL;
601 if (state->lex_string_tokens) {
602 g_slist_free_full(state->lex_string_tokens, g_free);
603 state->lex_string_tokens = NULL;
606 if (state->lex_struct_tokens) {
607 g_slist_free_full(state->lex_struct_tokens, g_free);
608 state->lex_struct_tokens = NULL;
612 pool->parser_state = NULL;
617 pbl_reinit_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool, const char* filepath)
622 pbl_clear_state(state, pool);
625 state->file = (pbl_file_descriptor_t*) g_hash_table_lookup(pool->proto_files, filepath);
626 state->pParser = ProtobufLangParserAlloc(g_malloc0);
629 pool->parser_state = state;
633 int run_pbl_parser(pbl_descriptor_pool_t* pool)
635 protobuf_lang_state_t state = {0};
640 const char* filepath;
642 while (!g_queue_is_empty(pool->proto_files_to_be_parsed)) {
643 filepath = (const char*) g_queue_peek_head(pool->proto_files_to_be_parsed);
644 /* reinit state and scanner */
645 pbl_reinit_state(&state, pool, filepath);
648 /* Note that filepath is absolute path in proto_files */
649 fp = ws_fopen(filepath, "r");
651 pbl_parser_error(&state, "File does not exists!");
656 status = protobuf_lang_lex_init(&scanner);
658 pbl_parser_error(&state, "Initialize Protocol Buffers Language scanner failed!\n");
663 /* associate the parser state with the lexical analyzer state */
664 protobuf_lang_set_extra(&state, scanner);
665 state.scanner = scanner;
667 protobuf_lang_restart(fp, scanner);
668 /* uncomment the next line for debugging */
669 /* ProtobufLangParserTrace(stdout, ">>>"); */
670 while (!state.grammar_error && (token_id = protobuf_lang_lex(scanner))) {
671 /* state.tmp_token contains token string value and lineno information */
672 ProtobufLangParser(state.pParser, token_id, state.tmp_token, &state);
676 if (state.grammar_error) {
680 ProtobufLangParser(state.pParser, 0, NULL, &state);
683 /* remove the parsed file from list */
684 g_queue_pop_head(pool->proto_files_to_be_parsed);
688 pbl_clear_state(&state, pool);
692 } /* end of %code block */