2 /* protobuf_lang_parser.lemon
4 * C Protocol Buffers Language (PBL) Parser (for *.proto files)
5 * Copyright 2020, Huang Qiangxiong <qiangxiong.huang@qq.com>
7 * SPDX-License-Identifier: GPL-2.0-or-later
10 /* This parser is mainly to get MESSAGE, ENUM, and FIELD information from *.proto files.
11 * There are two formats of *.proto files:
12 * 1) Protocol Buffers Version 3 Language Specification:
13 * https://developers.google.com/protocol-buffers/docs/reference/proto3-spec
14 * 2) Protocol Buffers Version 2 Language Specification:
15 * https://developers.google.com/protocol-buffers/docs/reference/proto2-spec
16 * There are some errors about 'proto', 'option' (value), 'extensions', and 'reserved' (fieldName) definitions on that sites.
17 * This parser is created because Wireshark is mainly implemented in plain ANSI C but the official
18 * Protocol Buffers Language parser is implemented in C++.
27 #include <ws_diag_control.h>
28 #include <wsutil/file_util.h>
29 #include "protobuf_lang_tree.h"
30 #include "protobuf_lang_parser.h"
31 #include "protobuf_lang_scanner_lex.h"
33 #define NAME_TO_BE_SET "<NAME_TO_BE_SET>"
34 #define NEED_NOT_NAME "<NEED_NOT_NAME>"
36 static void *ProtobufLangParserAlloc(void *(*mallocProc)(size_t));
37 static void ProtobufLangParser(void *yyp, int yymajor, protobuf_lang_token_t *yyminor, protobuf_lang_state_t *state);
38 static void ProtobufLangParserFree(void *p, void (*freeProc)(void*));
40 /* Error handling function for parser */
41 void protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg);
43 /* Extended error handling function */
44 void pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...);
46 /* It's just the approximate line number which is gotten when a grammar rule is reduced
47 by the parser (lemon). That might be overridden by the lineno argument of
48 pbl_set_node_name() later. */
49 #define CUR_LINENO (protobuf_lang_get_lineno(state->scanner))
52 } /* end of %include */
58 %name ProtobufLangParser
60 %extra_argument { protobuf_lang_state_t *state }
62 %token_type { protobuf_lang_token_t* }
65 /* We manage memory allocated for token values by ourself */
66 (void) state; /* Mark unused, similar to Q_UNUSED */
67 (void) $$; /* Mark unused, similar to Q_UNUSED */
72 pbl_parser_error(state, "Syntax Error: unexpected token \"%s\"", yyminor->v);
74 pbl_parser_error(state, "Syntax Error: missing token");
76 state->grammar_error = TRUE;
80 pbl_parser_error(state, "Parse Error");
81 state->grammar_error = TRUE;
84 /* Keywords like 'syntax', 'message', etc can be used as the names of messages, fields or enums.
85 So we tell the lemon: "If you are unable to parse this keyword, try treating it as an identifier instead.*/
86 %fallback PT_IDENT PT_SYNTAX PT_IMPORT PT_WEAK PT_PUBLIC PT_PACKAGE PT_OPTION PT_REQUIRED PT_OPTIONAL.
87 %fallback PT_IDENT PT_REPEATED PT_ONEOF PT_MAP PT_RESERVED PT_ENUM PT_GROUP PT_EXTEND PT_EXTENSIONS.
88 %fallback PT_IDENT PT_MESSAGE PT_SERVICE PT_RPC PT_STREAM PT_RETURNS PT_TO.
90 %type strLit { gchar* }
91 %type label { gchar* }
93 %type keyType { gchar* }
94 %type messageType { gchar* }
95 %type constant { gchar* }
97 %type exIdent { protobuf_lang_token_t* }
98 %type optionName { protobuf_lang_token_t* }
99 %type messageName { protobuf_lang_token_t* }
100 %type enumName { protobuf_lang_token_t* }
101 %type streamName { protobuf_lang_token_t* }
102 %type fieldName { protobuf_lang_token_t* }
103 %type oneofName { protobuf_lang_token_t* }
104 %type mapName { protobuf_lang_token_t* }
105 %type serviceName { protobuf_lang_token_t* }
106 %type rpcName { protobuf_lang_token_t* }
107 %type groupName { protobuf_lang_token_t* }
109 %type protoBody { pbl_node_t* }
110 %type topLevelDef { pbl_node_t* }
111 %type message { pbl_node_t* }
112 %type messageBody { pbl_node_t* }
113 %type rpc { pbl_node_t* }
114 %type rpcDecl { pbl_node_t* }
115 %type field { pbl_node_t* }
116 %type oneofField { pbl_node_t* }
117 %type enum { pbl_node_t* }
118 %type enumBody { pbl_node_t* }
119 %type enumField { pbl_node_t* }
120 %type service { pbl_node_t* }
121 %type serviceBody { pbl_node_t* }
122 %type stream { pbl_node_t* }
123 %type streamDecl { pbl_node_t* }
124 %type fieldOptions { pbl_node_t* }
125 %type fieldOption { pbl_node_t* }
126 %type oneof { pbl_node_t* }
127 %type oneofBody { pbl_node_t* }
128 %type mapField { pbl_node_t* }
129 %type group { pbl_node_t* }
130 %type extend { pbl_node_t* }
131 %type extendBody { pbl_node_t* }
133 %type intLit { uint64_t }
135 %type fieldNumber { int }
136 %type enumNumber { int }
138 /* We don't care about the types of following nodes:
139 syntax import package option enumValueOptions enumValueOption rpcBody streamBody
140 extensions reserved ranges range quoteFieldNames emptyStatement
145 /* v2/v3: proto = syntax { import | package | option | topLevelDef | emptyStatement } */
146 /* Official PBL bugfix: proto = { syntax } { import | package | option | topLevelDef | emptyStatement }
147 The default syntax version is "proto2". */
148 proto ::= wholeProtoBody.
149 proto ::= syntax wholeProtoBody.
151 wholeProtoBody ::= protoBody(B).
153 /* set real package name */
154 pbl_set_node_name(B, state->file->package_name_lineno, state->file->package_name);
155 /* use the allocate mem of the name of the package node */
156 state->file->package_name = pbl_get_node_name(B);
157 /* put this file data into package tables */
158 pbl_node_t* packnode = (pbl_node_t*)g_hash_table_lookup(state->pool->packages, state->file->package_name);
160 pbl_merge_children(packnode, B);
163 g_hash_table_insert(state->pool->packages, g_strdup(state->file->package_name), B);
167 /* v2: syntax = "syntax" "=" quote "proto2" quote ";" */
168 /* v3: syntax = "syntax" "=" quote "proto3" quote ";" */
169 syntax ::= PT_SYNTAX PT_ASSIGN strLit(B) PT_SEMICOLON.
171 if (!strcmp(B, "proto3")) {
172 state->file->syntax_version = 3;
173 } else if (!strcmp(B, "proto2")) {
174 state->file->syntax_version = 2;
176 pbl_parser_error(state, "Unrecognized syntax identifier [%s]. This parser only recognizes \"proto3\" or \"proto2\"!", B);
177 state->grammar_error = TRUE;
181 protoBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_PACKAGE, NAME_TO_BE_SET); } /* create am empty package node */
182 protoBody ::= protoBody import. /* default action is {A = B; } */
183 protoBody ::= protoBody package.
184 protoBody ::= protoBody option.
185 protoBody(A) ::= protoBody(B) topLevelDef(C). { A = B; pbl_add_child(A, C); }
186 protoBody ::= protoBody emptyStatement.
188 /* v2/v3: import = "import" [ "weak" | "public" ] strLit ";" */
189 import ::= PT_IMPORT strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); } /* append file to todo list */
190 import ::= PT_IMPORT PT_PUBLIC strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
191 import ::= PT_IMPORT PT_WEAK strLit(B) PT_SEMICOLON. { pbl_add_proto_file_to_be_parsed(state->pool, B); }
193 /* v2/v3: package = "package" fullIdent ";" */
194 package ::= PT_PACKAGE exIdent(B) PT_SEMICOLON.
195 { /* The memory of (B) will be freed after parsing, but the package_name will
196 be replaced by the new-allocated name of package node late */
197 state->file->package_name = B->v;
198 state->file->package_name_lineno = B->ln;
201 /* v2/v3: option = "option" optionName "=" constant ";" */
202 /* Official PBL bugfix: option = "option" optionName "=" ( constant | customOptionValue ) ";" */
203 option ::= PT_OPTION optionName PT_ASSIGN constant PT_SEMICOLON.
204 option ::= PT_OPTION optionName PT_ASSIGN customOptionValue PT_SEMICOLON.
206 /* v2/v3: optionName = ( ident | "(" fullIdent ")" ) { "." ident } */
207 /* Official PBL bugfix: optionName = ( ident | "(" fullIdent ")" ) { "." ( ident | "(" fullIdent ")" ) } */
208 extIdentInParentheses(A) ::= PT_LPAREN exIdent(B) PT_RPAREN.
209 { A = B; A->v = pbl_store_string_token(state, g_strconcat("(", B->v, ")", NULL)); }
210 optionName ::= exIdent.
211 optionName ::= extIdentInParentheses.
212 optionName(A) ::= optionName(B) exIdent(C). // Note that the exIdent contains "."
213 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, C->v, NULL)); }
214 optionName(A) ::= optionName(B) PT_DOT extIdentInParentheses(C).
215 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); }
216 optionName(A) ::= optionName(B) extIdentInParentheses(C).
217 { A = B; A->v = pbl_store_string_token(state, g_strconcat(B->v, ".", C->v, NULL)); }
219 /* Allow format which not defined in official PBL specification like:
220 option (google.api.http) = { post: "/v3alpha/kv/put" body: "*" };
221 option (google.api.http) = { post: "/v3alpha/kv/put", body: "*" };
222 option (google.api.http) = { post: "/v3alpha/kv/put" { any format } body: "*" };
224 customOptionValue ::= PT_LCURLY customOptionBody PT_RCURLY.
226 /* The formal EBNF of customOptionBody seems to be */
228 customOptionBody ::= .
229 customOptionBody ::= customOptionBody optionField.
230 customOptionBody ::= customOptionBody PT_COMMA optionField.
231 customOptionBody ::= customOptionBody PT_SEMICOLON optionField.
233 optionField ::= optionName PT_COLON constant.
234 optionField ::= optionName PT_COLON customOptionValue.
235 optionField ::= optionName customOptionValue.
236 optionField ::= optionName PT_COLON array.
238 array ::= PT_LBRACKET arrayBody PT_RBRACKET.
239 arrayBodyConst ::= constant.
240 arrayBodyConst ::= arrayBody PT_COMMA constant.
241 arrayBodyCustom ::= customOptionValue.
242 arrayBodyCustom ::= arrayBody PT_COMMA customOptionValue.
243 arrayBody ::= arrayBodyConst.
244 arrayBody ::= arrayBodyCustom.
246 /* but for handling unexpected situations, we still use following EBNF */
247 customOptionBody ::= .
248 customOptionBody ::= customOptionBody exIdent.
249 customOptionBody ::= customOptionBody PT_STRLIT.
250 customOptionBody ::= customOptionBody symbolsWithoutCurly.
251 customOptionBody ::= customOptionBody intLit.
252 customOptionBody ::= customOptionBody customOptionValue.
254 symbolsWithoutCurly ::= PT_LPAREN.
255 symbolsWithoutCurly ::= PT_RPAREN.
256 symbolsWithoutCurly ::= PT_LBRACKET.
257 symbolsWithoutCurly ::= PT_RBRACKET.
258 symbolsWithoutCurly ::= PT_EQUAL.
259 symbolsWithoutCurly ::= PT_NOTEQUAL.
260 symbolsWithoutCurly ::= PT_NOTEQUAL2.
261 symbolsWithoutCurly ::= PT_GEQUAL.
262 symbolsWithoutCurly ::= PT_LEQUAL.
263 symbolsWithoutCurly ::= PT_ASSIGN_PLUS.
264 symbolsWithoutCurly ::= PT_ASSIGN.
265 symbolsWithoutCurly ::= PT_PLUS.
266 symbolsWithoutCurly ::= PT_MINUS.
267 symbolsWithoutCurly ::= PT_MULTIPLY.
268 symbolsWithoutCurly ::= PT_DIV.
269 symbolsWithoutCurly ::= PT_LOGIC_OR.
270 symbolsWithoutCurly ::= PT_OR.
271 symbolsWithoutCurly ::= PT_LOGIC_AND.
272 symbolsWithoutCurly ::= PT_AND.
273 symbolsWithoutCurly ::= PT_NOT.
274 symbolsWithoutCurly ::= PT_NEG.
275 symbolsWithoutCurly ::= PT_XOR.
276 symbolsWithoutCurly ::= PT_SHL.
277 symbolsWithoutCurly ::= PT_SHR.
278 symbolsWithoutCurly ::= PT_PERCENT.
279 symbolsWithoutCurly ::= PT_DOLLAR.
280 symbolsWithoutCurly ::= PT_COND.
281 symbolsWithoutCurly ::= PT_SEMICOLON.
282 symbolsWithoutCurly ::= PT_DOT.
283 symbolsWithoutCurly ::= PT_COMMA.
284 symbolsWithoutCurly ::= PT_COLON.
285 symbolsWithoutCurly ::= PT_LESS.
286 symbolsWithoutCurly ::= PT_GREATER.
288 /* v2: topLevelDef = message | enum | extend | service */
289 /* v3: topLevelDef = message | enum | service */
290 topLevelDef ::= message.
291 topLevelDef ::= enum.
292 topLevelDef ::= extend. /*v2 only */
293 topLevelDef ::= service.
295 /* v2/v3: message = "message" messageName messageBody */
296 message(A) ::= PT_MESSAGE messageName(B) PT_LCURLY messageBody(C) PT_RCURLY.
297 { A = C; pbl_set_node_name(A, B->ln, B->v); }
299 /* v2: messageBody = "{" { field | enum | message | extend | extensions | group | option | oneof | mapField | reserved | emptyStatement } "}" */
300 /* v3: messageBody = "{" { field | enum | message | option | oneof | mapField | reserved | emptyStatement } "}" */
301 messageBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
302 messageBody(A) ::= messageBody(B) field(C). { A = B; pbl_add_child(A, C); }
303 messageBody(A) ::= messageBody(B) enum(C). { A = B; pbl_add_child(A, C); }
304 messageBody(A) ::= messageBody(B) message(C). { A = B; pbl_add_child(A, C); }
305 messageBody ::= messageBody extend. /* v2 only */
306 messageBody ::= messageBody extensions. /* v2 only */
307 messageBody(A) ::= messageBody(B) group(C). /* v2 only */ { A = B; pbl_add_child(A, C); }
308 messageBody ::= messageBody option.
309 messageBody(A) ::= messageBody(B) oneof(C). { A = B; pbl_merge_children(A, C); pbl_free_node(C); }
310 messageBody(A) ::= messageBody(B) mapField(C). { A = B; pbl_add_child(A, C); }
311 messageBody ::= messageBody reserved.
312 messageBody ::= messageBody emptyStatement.
314 /* v2/v3: enum = "enum" enumName enumBody */
315 enum(A) ::= PT_ENUM enumName(B) PT_LCURLY enumBody(C) PT_RCURLY.
316 { A = C; pbl_set_node_name(A, B->ln, B->v); }
318 /* v2/v3: enumBody = "{" { option | enumField | emptyStatement } "}" */
319 /* Official PBL bugfix: enumBody = "{" { reserved | option | enumField | emptyStatement } "}" */
320 enumBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ENUM, NAME_TO_BE_SET); }
321 enumBody ::= enumBody reserved.
322 enumBody ::= enumBody option.
323 enumBody(A) ::= enumBody(B) enumField(C). { A = B; pbl_add_child(A, C); }
324 enumBody ::= enumBody emptyStatement.
326 /* v2/v3: enumField = ident "=" [ "-" ] intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" */
327 enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C) PT_LBRACKET enumValueOptions PT_RBRACKET PT_SEMICOLON.
328 { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
329 enumField(A) ::= exIdent(B) PT_ASSIGN enumNumber(C).
330 { A = pbl_create_enum_value_node(state->file, B->ln, B->v, C); }
332 /* v2/v3: must be in the range of a 32-bit integer. negative values are not recommended. */
333 enumNumber(A) ::= intLit(B). { A = (int)B; }
334 enumNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
335 enumNumber(A) ::= PT_MINUS intLit(B). { A = -(int)B; }
337 /* v2/v3: enumValueOption { "," enumValueOption } */
338 enumValueOptions ::= enumValueOption.
339 enumValueOptions ::= enumValueOptions PT_COMMA enumValueOption.
341 /* v2/v3: enumValueOption = optionName "=" constant */
342 /* Official PBL bugfix: enumValueOption = optionName "=" ( constant | customOptionValue ) ";" */
343 enumValueOption ::= optionName PT_ASSIGN constant.
344 enumValueOption ::= optionName PT_ASSIGN customOptionValue.
346 /* v2: service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" */
347 /* v3: service = "service" serviceName "{" { option | rpc | emptyStatement } "}" */
348 service(A) ::= PT_SERVICE serviceName(B) PT_LCURLY serviceBody(C) PT_RCURLY.
349 { A = C; pbl_set_node_name(A, B->ln, B->v); }
351 serviceBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_SERVICE, NAME_TO_BE_SET); }
352 serviceBody ::= serviceBody option.
353 serviceBody(A) ::= serviceBody(B) rpc(C). { A = B; pbl_add_child(A, C); }
354 serviceBody ::= serviceBody emptyStatement.
355 serviceBody(A) ::= serviceBody(B) stream(C). /* v2 only */ { A = B; pbl_add_child(A, C); }
357 /* v2/v3: rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" (( "{" {option | emptyStatement } "}" ) | ";") */
358 rpc ::= rpcDecl PT_SEMICOLON.
359 rpc ::= rpcDecl PT_LCURLY rpcBody PT_RCURLY.
361 /* "rpc" rpcName "(" [ "stream" ] messageType ")" "returns" "(" [ "stream" ] messageType ")" */
362 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
363 { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, FALSE); }
364 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN messageType(D) PT_RPAREN.
365 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, FALSE); }
366 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
367 { A = pbl_create_method_node(state->file, B->ln, B->v, C, FALSE, D, TRUE); }
368 rpcDecl(A) ::= PT_RPC rpcName(B) PT_LPAREN PT_STREAM messageType(C) PT_RPAREN PT_RETURNS PT_LPAREN PT_STREAM messageType(D) PT_RPAREN.
369 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
372 rpcBody ::= rpcBody option.
373 rpcBody ::= rpcBody emptyStatement.
375 /* v2: stream = "stream" streamName "(" messageType "," messageType ")" (( "{" { option | emptyStatement } "}") | ";" ) */
376 stream ::= streamDecl PT_SEMICOLON.
377 stream ::= streamDecl PT_LCURLY streamBody PT_RCURLY.
380 streamDecl(A) ::= PT_STREAM streamName(B) PT_LPAREN messageType(C) PT_COMMA messageType(D) PT_RPAREN.
381 { A = pbl_create_method_node(state->file, B->ln, B->v, C, TRUE, D, TRUE); }
385 streamBody ::= streamBody option.
386 streamBody ::= streamBody emptyStatement.
388 /* v2: label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
389 /* v3: field = [ "repeated" ] type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
390 field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
391 { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, NULL); }
392 field(A) ::= type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
393 { A = pbl_create_field_node(state->file, D->ln, NULL, C, D->v, E, F); }
394 field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
395 { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, NULL); }
396 field(A) ::= label(B) type(C) fieldName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
397 { A = pbl_create_field_node(state->file, D->ln, B, C, D->v, E, F); }
399 /* v2: label = "required" | "optional" | "repeated" */
400 label(A) ::= PT_REQUIRED(B). { A = B->v; }
401 label(A) ::= PT_OPTIONAL(B). { A = B->v; }
402 label(A) ::= PT_REPEATED(B). { A = B->v; }
404 /* v2/v3: type = "double" | "float" | "int32" | "int64" | "uint32" | "uint64"
405 | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64"
406 | "bool" | "string" | "bytes" | messageType | enumType
408 type(A) ::= exIdent(B). { A = B->v; }
410 /* v2/v3: The smallest field number is 1, and the largest is 2^29 - 1, or 536,870,911. */
411 fieldNumber(A) ::= intLit(B). { A = (int)B; }
412 fieldNumber(A) ::= PT_PLUS intLit(B). { A = (int)B; }
414 /* v2/v3: fieldOptions = fieldOption { "," fieldOption } */
415 fieldOptions(A) ::= fieldOption(B).
416 { A = pbl_create_node(state->file, CUR_LINENO, PBL_OPTIONS, NEED_NOT_NAME); pbl_add_child(A, B); }
417 fieldOptions(A) ::= fieldOptions(B) PT_COMMA fieldOption(C).
418 { A = B; pbl_add_child(A, C); }
420 /* v2/v3: fieldOption = optionName "=" constant */
421 /* Official PBL bugfix: fieldOption = optionName "=" ( constant | customOptionValue ) ";" */
422 fieldOption(A) ::= optionName(B) PT_ASSIGN constant(C).
423 { A = pbl_create_option_node(state->file, B->ln, B->v, C); }
424 fieldOption(A) ::= optionName(B) PT_ASSIGN customOptionValue.
425 { A = pbl_create_option_node(state->file, B->ln, B->v, pbl_store_string_token(state, g_strdup("{ ... }"))); }
427 /* v2 only: group = label "group" groupName "=" fieldNumber messageBody */
428 /* Official PBL bugfix: there is no label if the 'group' is a member of oneof body */
429 group(A) ::= PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY.
430 { A = C; pbl_set_node_name(A, B->ln, B->v); }
431 group(A) ::= label PT_GROUP groupName(B) PT_ASSIGN fieldNumber PT_LCURLY messageBody(C) PT_RCURLY.
432 { A = C; pbl_set_node_name(A, B->ln, B->v); }
434 groupName ::= exIdent.
436 /* v2/v3: oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" */
437 /* Official PBL bugfix: oneof = "oneof" oneofName "{" { oneofField | option | group | emptyStatement } "}" */
438 oneof(A) ::= PT_ONEOF oneofName(B) PT_LCURLY oneofBody(C) PT_RCURLY.
439 { A = C; pbl_set_node_name(A, B->ln, B->v); }
441 oneofBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_ONEOF, NAME_TO_BE_SET); }
442 oneofBody(A) ::= oneofBody(B) oneofField(C). { A = B; pbl_add_child(A, C); }
443 oneofBody ::= oneofBody option.
444 oneofBody ::= oneofBody group.
445 oneofBody ::= oneofBody emptyStatement.
447 /* v2/v3: oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
448 oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_LBRACKET fieldOptions(E) PT_RBRACKET PT_SEMICOLON.
449 { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, E); }
450 oneofField(A) ::= type(B) fieldName(C) PT_ASSIGN fieldNumber(D) PT_SEMICOLON.
451 { A = pbl_create_field_node(state->file, C->ln, NULL, B, C->v, D, NULL); }
453 /* v2/v3: mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" */
454 mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_LBRACKET fieldOptions(F) PT_RBRACKET PT_SEMICOLON.
456 A = pbl_create_map_field_node(state->file, D->ln, D->v, E, F);
457 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
458 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
460 mapField(A) ::= PT_MAP PT_LESS keyType(B) PT_COMMA type(C) PT_GREATER mapName(D) PT_ASSIGN fieldNumber(E) PT_SEMICOLON.
462 A = pbl_create_map_field_node(state->file, D->ln, D->v, E, NULL);
463 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, B, "key", 1, NULL)); /* add key field */
464 pbl_add_child(A, pbl_create_field_node(state->file, D->ln, NULL, C, "value", 2, NULL)); /* add value field */
467 /* keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
468 "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" */
469 keyType(A) ::= exIdent(B). { A = B->v; }
471 /* v2 only: Allow extension declarations which are not defined in official PBL specification, like:
472 message UserContent {
473 extensions 100 to 199 [
476 full_name: ".kittens.kitten_videos",
477 type: ".kittens.Video",
480 // Ensures all field numbers in this extension range are declarations.
481 verification = DECLARATION
485 For examples and explanations see:
486 1) Extension Declarations Guide:
487 https://protobuf.dev/programming-guides/extension_declarations/
488 2) Extensions section of the Protocol Buffers Version 2 Language Specification:
489 https://protobuf.dev/programming-guides/proto2/#ext-example
492 /* v2 only: extensions = "extensions" ranges [ "[" declarations "]" ] ";" */
493 extensions ::= PT_EXTENSIONS ranges PT_SEMICOLON.
494 extensions ::= PT_EXTENSIONS ranges PT_LBRACKET declarations PT_RBRACKET PT_SEMICOLON.
496 /* v2 only: declarations = declaration { , declaration } */
497 declarations ::= declaration.
498 declarations ::= declarations PT_COMMA declaration.
500 /* v2 only: declaration = optionName "=" ( constant | customOptionValue ) */
501 /* Simply reuse constant|customOptionValue, we don't care about the content, we just need to support the syntax generally. */
502 declaration ::= optionName PT_ASSIGN constant.
503 declaration ::= optionName PT_ASSIGN customOptionValue.
505 /* v2/v3: reserved = "reserved" ( ranges | fieldNames ) ";" */
506 reserved ::= PT_RESERVED ranges PT_SEMICOLON.
507 reserved ::= PT_RESERVED quoteFieldNames PT_SEMICOLON.
509 /* v2/v3: ranges = range { "," range } */
511 ranges ::= ranges PT_COMMA range.
513 /* v2/v3: range = intLit [ "to" ( intLit | "max" ) ] */
515 range ::= intLit PT_TO intLit.
516 range ::= intLit PT_TO exIdent.
518 /* v2/v3: fieldNames = fieldName { "," fieldName }
519 Note that there is an error in BNF definition about reserved fieldName. It's strLit, not ident.
521 quoteFieldNames ::= strLit.
522 quoteFieldNames ::= quoteFieldNames PT_COMMA strLit.
524 /* v2/v3: extend = "extend" messageType "{" {field | group | emptyStatement} "}"
525 Note that creating custom options uses extensions, which are permitted only for custom options in proto3.
526 We don't use custom options while parsing packet, so we just ignore the 'extend'.
528 extend(A) ::= PT_EXTEND messageType PT_LCURLY extendBody(B) PT_RCURLY.
529 { A = NULL; pbl_free_node(B); }
531 extendBody(A) ::= . { A = pbl_create_node(state->file, CUR_LINENO, PBL_MESSAGE, NAME_TO_BE_SET); }
532 extendBody(A) ::= extendBody(B) field(C). { A = B; pbl_add_child(A, C); }
533 extendBody(A) ::= extendBody(B) group(C). { A = B; pbl_add_child(A, C); }
534 extendBody ::= extendBody emptyStatement.
536 messageName ::= exIdent.
537 enumName ::= exIdent.
538 streamName ::= exIdent.
539 fieldName ::= exIdent.
540 oneofName ::= exIdent.
542 serviceName ::= exIdent.
545 /* messageType = [ "." ] { ident "." } messageName */
546 messageType(A) ::= exIdent(B). { A = B->v; }
548 /* enumType = [ "." ] { ident "." } enumName */
549 /*enumType ::= exIdent.*/
551 /* intLit = decimalLit | octalLit | hexLit */
552 intLit(A) ::= PT_DECIMALLIT(B). { A = g_ascii_strtoull(B->v, NULL, 10); }
553 intLit(A) ::= PT_OCTALLIT(B). { A = g_ascii_strtoull(B->v+1, NULL, 8); }
554 intLit(A) ::= PT_HEXLIT(B). { A = g_ascii_strtoull(B->v+2, NULL, 16); }
556 /* emptyStatement = ";" */
557 emptyStatement ::= PT_SEMICOLON.
559 /* constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit */
560 constant(A) ::= exIdent(B). { A = B->v; } /* boolLit is parsed as exIdent */
562 constant(A) ::= intLit(B). { A = pbl_store_string_token(state, ws_strdup_printf("%" PRIu64, B)); }
563 constant(A) ::= PT_PLUS intLit(B). { A = pbl_store_string_token(state, ws_strdup_printf("%" PRIu64, B)); }
564 constant(A) ::= PT_MINUS intLit(B). { A = pbl_store_string_token(state, ws_strdup_printf("-%" PRIu64, B)); }
565 constant(A) ::= PT_PLUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("+", B->v, NULL)); } /* This cover floatLit. */
566 constant(A) ::= PT_MINUS exIdent(B). { A = pbl_store_string_token(state, g_strconcat("-", B->v, NULL)); }
568 exIdent ::= PT_IDENT.
570 strLit(A) ::= PT_STRLIT(B). { A = pbl_store_string_token(state, g_strndup(B->v + 1, strlen(B->v) - 2)); }
571 /* support one string being splitted into multi-lines */
572 strLit(A) ::= strLit(B) PT_STRLIT(C). { gchar *v = g_strndup(C->v + 1, strlen(C->v) - 2); A = pbl_store_string_token(state, g_strconcat(B, v, NULL)); g_free(v); }
577 protobuf_lang_error(void* yyscanner, protobuf_lang_state_t *state, const char *msg)
580 void(*error_cb)(const char *format, ...);
581 const char* filepath = (state && state->file) ?
582 state->file->filename : "UNKNOWN";
584 error_cb = (state && state->pool->error_cb) ?
585 state->pool->error_cb : pbl_printf;
587 lineno = yyscanner ? protobuf_lang_get_lineno(yyscanner) : -1;
590 error_cb("Protobuf: Parsing file [%s:%d] failed: %s\n", filepath, lineno, msg);
592 error_cb("Protobuf: Parsing file [%s] failed: %s\n", filepath, msg);
597 pbl_parser_error(protobuf_lang_state_t *state, const char *fmt, ...)
603 msg = ws_strdup_vprintf(fmt, ap);
604 scanner = state ? state->scanner : NULL;
605 protobuf_lang_error(scanner, state, msg);
611 pbl_clear_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool)
619 state->grammar_error = FALSE;
620 state->tmp_token = NULL;
622 if (state->scanner) {
623 protobuf_lang_lex_destroy(state->scanner);
624 state->scanner = NULL;
627 if (state->pParser) {
628 ProtobufLangParserFree(state->pParser, g_free);
629 state->pParser = NULL;
632 if (state->lex_string_tokens) {
633 g_slist_free_full(state->lex_string_tokens, g_free);
634 state->lex_string_tokens = NULL;
637 if (state->lex_struct_tokens) {
638 g_slist_free_full(state->lex_struct_tokens, g_free);
639 state->lex_struct_tokens = NULL;
643 pool->parser_state = NULL;
648 pbl_reinit_state(protobuf_lang_state_t *state, pbl_descriptor_pool_t* pool, const char* filepath)
653 pbl_clear_state(state, pool);
656 state->file = (pbl_file_descriptor_t*) g_hash_table_lookup(pool->proto_files, filepath);
657 state->pParser = ProtobufLangParserAlloc(g_malloc0);
660 pool->parser_state = state;
664 int run_pbl_parser(pbl_descriptor_pool_t* pool)
666 protobuf_lang_state_t state = {0};
671 const char* filepath;
673 while (!g_queue_is_empty(pool->proto_files_to_be_parsed)) {
674 filepath = (const char*) g_queue_peek_head(pool->proto_files_to_be_parsed);
675 /* reinit state and scanner */
676 pbl_reinit_state(&state, pool, filepath);
679 /* Note that filepath is absolute path in proto_files */
680 fp = ws_fopen(filepath, "r");
682 pbl_parser_error(&state, "File does not exists!");
687 status = protobuf_lang_lex_init(&scanner);
689 pbl_parser_error(&state, "Initialize Protocol Buffers Language scanner failed!\n");
694 /* associate the parser state with the lexical analyzer state */
695 protobuf_lang_set_extra(&state, scanner);
696 state.scanner = scanner;
698 protobuf_lang_restart(fp, scanner);
699 /* uncomment the next line for debugging */
700 /* ProtobufLangParserTrace(stdout, ">>>"); */
701 while (!state.grammar_error && (token_id = protobuf_lang_lex(scanner))) {
702 /* state.tmp_token contains token string value and lineno information */
703 ProtobufLangParser(state.pParser, token_id, state.tmp_token, &state);
707 if (state.grammar_error) {
711 ProtobufLangParser(state.pParser, 0, NULL, &state);
714 /* remove the parsed file from list */
715 g_queue_pop_head(pool->proto_files_to_be_parsed);
719 pbl_clear_state(&state, pool);
723 } /* end of %code block */