1 /* tc-bpf.c -- Assembler for the Linux eBPF.
2 Copyright (C) 2019-2023 Free Software Foundation, Inc.
3 Contributed by Oracle, Inc.
5 This file is part of GAS, the GNU Assembler.
7 GAS is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GAS is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GAS; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street - Fifth Floor,
20 Boston, MA 02110-1301, USA. */
25 #include "opcodes/bpf-desc.h"
26 #include "opcodes/bpf-opc.h"
28 #include "elf/common.h"
30 #include "dwarf2dbg.h"
33 const char comment_chars
[] = ";";
34 const char line_comment_chars
[] = "#";
35 const char line_separator_chars
[] = "`";
36 const char EXP_CHARS
[] = "eE";
37 const char FLT_CHARS
[] = "fFdD";
39 static const char *invalid_expression
;
40 static char pseudoc_lex
[256];
41 static const char symbol_chars
[] =
42 "_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
44 static const char arithm_op
[] = "+-/<>%&|^";
46 static void init_pseudoc_lex (void);
48 #define LEX_IS_SYMBOL_COMPONENT 1
49 #define LEX_IS_WHITESPACE 2
50 #define LEX_IS_NEWLINE 3
51 #define LEX_IS_ARITHM_OP 4
53 #define LEX_IS_CLSE_BR 7
54 #define LEX_IS_OPEN_BR 8
55 #define LEX_IS_EQUAL 9
56 #define LEX_IS_EXCLA 10
59 #define MAX_TOKEN_SZ 100
61 /* Like s_lcomm_internal in gas/read.c but the alignment string
62 is allowed to be optional. */
65 pe_lcomm_internal (int needs_align
, symbolS
*symbolP
, addressT size
)
72 && *input_line_pointer
== ',')
74 align
= parse_align (needs_align
- 1);
76 if (align
== (addressT
) -1)
91 bss_alloc (symbolP
, size
, align
);
96 pe_lcomm (int needs_align
)
98 s_comm_internal (needs_align
* 2, pe_lcomm_internal
);
101 /* The target specific pseudo-ops which we support. */
102 const pseudo_typeS md_pseudo_table
[] =
106 { "dword", cons
, 8 },
107 { "lcomm", pe_lcomm
, 1 },
114 static CGEN_BITSET
*bpf_isa
;
118 /* Command-line options processing. */
122 OPTION_LITTLE_ENDIAN
= OPTION_MD_BASE
,
127 struct option md_longopts
[] =
129 { "EL", no_argument
, NULL
, OPTION_LITTLE_ENDIAN
},
130 { "EB", no_argument
, NULL
, OPTION_BIG_ENDIAN
},
131 { "mxbpf", no_argument
, NULL
, OPTION_XBPF
},
132 { NULL
, no_argument
, NULL
, 0 },
135 size_t md_longopts_size
= sizeof (md_longopts
);
137 const char * md_shortopts
= "";
139 extern int target_big_endian
;
141 /* Whether target_big_endian has been set while parsing command-line
143 static int set_target_endian
= 0;
145 static int target_xbpf
= 0;
147 static int set_xbpf
= 0;
150 md_parse_option (int c
, const char * arg ATTRIBUTE_UNUSED
)
154 case OPTION_BIG_ENDIAN
:
155 set_target_endian
= 1;
156 target_big_endian
= 1;
158 case OPTION_LITTLE_ENDIAN
:
159 set_target_endian
= 1;
160 target_big_endian
= 0;
174 md_show_usage (FILE * stream
)
176 fprintf (stream
, _("\nBPF options:\n"));
177 fprintf (stream
, _("\
178 --EL generate code for a little endian machine\n\
179 --EB generate code for a big endian machine\n\
180 -mxbpf generate xBPF instructions\n"));
186 init_pseudoc_lex (void)
190 for (p
= symbol_chars
; *p
; ++p
)
191 pseudoc_lex
[(unsigned char) *p
] = LEX_IS_SYMBOL_COMPONENT
;
193 pseudoc_lex
[' '] = LEX_IS_WHITESPACE
;
194 pseudoc_lex
['\t'] = LEX_IS_WHITESPACE
;
195 pseudoc_lex
['\r'] = LEX_IS_WHITESPACE
;
196 pseudoc_lex
['\n'] = LEX_IS_NEWLINE
;
197 pseudoc_lex
['*'] = LEX_IS_STAR
;
198 pseudoc_lex
[')'] = LEX_IS_CLSE_BR
;
199 pseudoc_lex
['('] = LEX_IS_OPEN_BR
;
200 pseudoc_lex
[']'] = LEX_IS_CLSE_BR
;
201 pseudoc_lex
['['] = LEX_IS_OPEN_BR
;
203 for (p
= arithm_op
; *p
; ++p
)
204 pseudoc_lex
[(unsigned char) *p
] = LEX_IS_ARITHM_OP
;
206 pseudoc_lex
['='] = LEX_IS_EQUAL
;
207 pseudoc_lex
['!'] = LEX_IS_EXCLA
;
213 /* Initialize the `cgen' interface. */
215 /* If not specified in the command line, use the host
217 if (!set_target_endian
)
219 #ifdef WORDS_BIGENDIAN
220 target_big_endian
= 1;
222 target_big_endian
= 0;
226 /* If not specified in the command line, use eBPF rather
231 /* Set the ISA, which depends on the target endianness. */
232 bpf_isa
= cgen_bitset_create (ISA_MAX
);
233 if (target_big_endian
)
236 cgen_bitset_set (bpf_isa
, ISA_XBPFBE
);
238 cgen_bitset_set (bpf_isa
, ISA_EBPFBE
);
243 cgen_bitset_set (bpf_isa
, ISA_XBPFLE
);
245 cgen_bitset_set (bpf_isa
, ISA_EBPFLE
);
248 /* Ensure that lines can begin with '*' in BPF store pseudoc instruction. */
249 lex_type
['*'] |= LEX_BEGIN_NAME
;
251 /* Set the machine number and endian. */
252 gas_cgen_cpu_desc
= bpf_cgen_cpu_open (CGEN_CPU_OPEN_ENDIAN
,
254 CGEN_ENDIAN_BIG
: CGEN_ENDIAN_LITTLE
,
255 CGEN_CPU_OPEN_INSN_ENDIAN
,
260 bpf_cgen_init_asm (gas_cgen_cpu_desc
);
262 /* This is a callback from cgen to gas to parse operands. */
263 cgen_set_parse_operand_fn (gas_cgen_cpu_desc
, gas_cgen_parse_operand
);
265 /* Set the machine type. */
266 bfd_default_set_arch_mach (stdoutput
, bfd_arch_bpf
, bfd_mach_bpf
);
271 md_section_align (segT segment
, valueT size
)
273 int align
= bfd_section_alignment (segment
);
275 return ((size
+ (1 << align
) - 1) & -(1 << align
));
279 /* Functions concerning relocs. */
281 /* The location from which a PC relative jump should be calculated,
282 given a PC relative reloc. */
285 md_pcrel_from_section (fixS
*fixP
, segT sec
)
287 if (fixP
->fx_addsy
!= (symbolS
*) NULL
288 && (! S_IS_DEFINED (fixP
->fx_addsy
)
289 || (S_GET_SEGMENT (fixP
->fx_addsy
) != sec
)
290 || S_IS_EXTERNAL (fixP
->fx_addsy
)
291 || S_IS_WEAK (fixP
->fx_addsy
)))
293 /* The symbol is undefined (or is defined but not in this section).
294 Let the linker figure it out. */
298 return fixP
->fx_where
+ fixP
->fx_frag
->fr_address
;
301 /* Write a value out to the object file, using the appropriate endianness. */
304 md_number_to_chars (char * buf
, valueT val
, int n
)
306 if (target_big_endian
)
307 number_to_chars_bigendian (buf
, val
, n
);
309 number_to_chars_littleendian (buf
, val
, n
);
313 tc_gen_reloc (asection
*sec
, fixS
*fix
)
315 return gas_cgen_tc_gen_reloc (sec
, fix
);
318 /* Return the bfd reloc type for OPERAND of INSN at fixup FIXP. This
319 is called when the operand is an expression that couldn't be fully
320 resolved. Returns BFD_RELOC_NONE if no reloc type can be found.
321 *FIXP may be modified if desired. */
323 bfd_reloc_code_real_type
324 md_cgen_lookup_reloc (const CGEN_INSN
*insn ATTRIBUTE_UNUSED
,
325 const CGEN_OPERAND
*operand
,
328 switch (operand
->type
)
330 case BPF_OPERAND_IMM64
:
331 return BFD_RELOC_BPF_64
;
332 case BPF_OPERAND_DISP32
:
334 return BFD_RELOC_BPF_DISP32
;
338 return BFD_RELOC_NONE
;
341 /* *FRAGP has been relaxed to its final size, and now needs to have
342 the bytes inside it modified to conform to the new size.
344 Called after relaxation is finished.
345 fragP->fr_type == rs_machine_dependent.
346 fragP->fr_subtype is the subtype of what the address relaxed to. */
349 md_convert_frag (bfd
*abfd ATTRIBUTE_UNUSED
,
350 segT sec ATTRIBUTE_UNUSED
,
351 fragS
*fragP ATTRIBUTE_UNUSED
)
353 as_fatal (_("convert_frag called"));
357 md_estimate_size_before_relax (fragS
*fragP ATTRIBUTE_UNUSED
,
358 segT segment ATTRIBUTE_UNUSED
)
360 as_fatal (_("estimate_size_before_relax called"));
366 md_apply_fix (fixS
*fixP
, valueT
*valP
, segT seg
)
368 /* Some fixups for instructions require special attention. This is
369 handled in the code block below. */
370 if ((int) fixP
->fx_r_type
>= (int) BFD_RELOC_UNUSED
)
372 int opindex
= (int) fixP
->fx_r_type
- (int) BFD_RELOC_UNUSED
;
373 const CGEN_OPERAND
*operand
= cgen_operand_lookup_by_num (gas_cgen_cpu_desc
,
377 switch (operand
->type
)
379 case BPF_OPERAND_DISP32
:
380 /* eBPF supports two kind of CALL instructions: the so
381 called pseudo calls ("bpf to bpf") and external calls
384 Both kind of calls use the same instruction (CALL).
385 However, external calls are constructed by passing a
386 constant argument to the instruction, whereas pseudo
387 calls result from expressions involving symbols. In
388 practice, instructions requiring a fixup are interpreted
389 as pseudo-calls. If we are executing this code, this is
392 The kernel expects for pseudo-calls to be annotated by
393 having BPF_PSEUDO_CALL in the SRC field of the
394 instruction. But beware the infamous nibble-swapping of
395 eBPF and take endianness into account here.
397 Note that the CALL instruction has only one operand, so
398 this code is executed only once per instruction. */
399 where
= fixP
->fx_frag
->fr_literal
+ fixP
->fx_where
+ 1;
400 where
[0] = target_big_endian
? 0x01 : 0x10;
402 case BPF_OPERAND_DISP16
:
403 /* The PC-relative displacement fields in jump instructions
404 shouldn't be in bytes. Instead, they hold the number of
405 64-bit words to the target, _minus one_. */
406 *valP
= (((long) (*valP
)) - 8) / 8;
413 /* And now invoke CGEN's handler, which will eventually install
414 *valP into the corresponding operand. */
415 gas_cgen_md_apply_fix (fixP
, valP
, seg
);
419 The BPF pseudo grammar:
421 instruction : bpf_alu_insn
424 | bpf_load_store_insn
425 | bpf_load_store32_insn
426 | bpf_non_generic_load
427 | bpf_endianness_conv_insn
428 | bpf_64_imm_load_insn
432 bpf_alu_insn : BPF_REG bpf_alu_operator register_or_imm32
435 bpf_alu32_insn : BPF_REG32 bpf_alu_operator register32_or_imm32
438 bpf_jump_insn : BPF_JA offset
439 | IF BPF_REG bpf_jump_operator register_or_imm32 BPF_JA offset
440 | IF BPF_REG32 bpf_jump_operator register_or_imm32 BPF_JA offset
445 bpf_load_store_insn : BPF_REG CHR_EQUAL bpf_size_cast BPF_CHR_OPEN_BR \
446 register_and_offset BPF_CHR_CLSE_BR
447 | bpf_size_cast register_and_offset CHR_EQUAL BPF_REG
450 bpf_load_store32_insn : BPF_REG CHR_EQUAL bpf_size_cast BPF_CHR_OPEN_BR \
451 register32_and_offset BPF_CHR_CLSE_BR
452 | bpf_size_cast register_and_offset CHR_EQUAL BPF_REG32
455 bpf_non_generic_load : BPF_REG_R0 CHR_EQUAL bpf_size_cast BPF_LD BPF_CHR_OPEN_BR \
456 imm32 BPF_CHR_CLSE_BR
459 bpf_endianness_conv_insn : BPF_REG_N bpf_endianness_mnem BPF_REG_N
462 bpf_64_imm_load_insn : BPF_REG imm64 BPF_LL
465 bpf_atomic_insn : BPF_LOCK bpf_size_cast_32_64 register_and_offset BPF_ADD BPF_REG
467 register_and_offset : BPF_CHR_OPEN_BR BPF_REG offset BPF_CHR_CLSE_BR
470 register32_and_offset : BPF_CHR_OPEN_BR BPF_REG32 offset BPF_CHR_CLSE_BR
473 bpf_size_cast : CHR_START BPF_CHR_OPEN_BR bpf_size CHR_START BPF_CHR_CLSE_BR
476 bpf_size_cast_32_64 : CHR_START BPF_CHR_OPEN_BR bpf_size_cast_32_64 CHR_STAR BPF_CHR_CLSE_BR
479 bpf_size_32_64 : BPF_CAST_U32
483 bpf_size : BPF_CAST_U8
489 bpf_jump_operator : BPF_JEQ
501 bpf_alu_operator : BPF_ADD
516 bpf_endianness_mnem : BPF_LE16
528 register_or_imm32 : BPF_REG
532 register32_or_imm32 : BPF_REG32
544 register_or_expression : BPF_EXPR
554 /* Keep grouped to quickly access. */
574 /* Keep grouped to quickly access. */
591 /* Keep grouped to quickly access. */
597 /* Keep grouped to quickly access. */
615 valid_expr (const char *e
, const char **end_expr
)
617 invalid_expression
= NULL
;
618 char *hold
= input_line_pointer
;
621 input_line_pointer
= (char *) e
;
622 deferred_expression (&exp
);
623 *end_expr
= input_line_pointer
;
624 input_line_pointer
= hold
;
626 return invalid_expression
== NULL
;
630 build_bpf_non_generic_load (char *src
, enum bpf_token_type cast
,
634 static const char *cast_rw
[] = {"b", "h", "w", "dw"};
636 bpf_insn
= xasprintf ("%s%s%s %s%s%s%s",
639 cast_rw
[cast
- BPF_CAST_U8
],
648 build_bpf_atomic_insn (char *dst
, char *src
,
649 enum bpf_token_type atomic_insn
,
650 enum bpf_token_type cast
,
654 static const char *cast_rw
[] = {"w", "dw"};
655 static const char *mnem
[] = {"xadd"};
657 bpf_insn
= xasprintf ("%s%s [%%%s%s%s],%%%s", mnem
[atomic_insn
- BPF_ADD
],
658 cast_rw
[cast
- BPF_CAST_U32
], dst
,
659 *offset
!= '+' ? "+" : "",
665 build_bpf_jmp_insn (char *dst
, char *src
,
666 char *imm32
, enum bpf_token_type op
,
667 const char *sym
, const char *offset
)
670 static const char *mnem
[] =
672 "jeq", "jgt", "jge", "jlt",
673 "jle", "jset", "jne", "jsgt",
674 "jsge", "jslt", "jsle"
677 const char *in32
= (*dst
== 'w' ? "32" : "");
683 bpf_insn
= xasprintf ("%s%s %%%s,%s%s,%s",
684 mnem
[op
- BPF_JEQ
], in32
, dst
,
687 offset
? offset
: sym
);
692 build_bpf_arithm_insn (char *dst
, char *src
,
693 int load64
, const char *imm32
,
694 enum bpf_token_type type
)
697 static const char *mnem
[] =
699 "add", "sub", "mul", "div",
700 "or", "and", "lsh", "rsh",
701 "mod", "xor", "mov", "arsh",
704 const char *in32
= (*dst
== 'w' ? "32" : "");
711 bpf_insn
= xasprintf ("%s%s %%%s", mnem
[type
- BPF_ADD
], in32
, dst
);
713 bpf_insn
= xasprintf ("%s %%%s,%s", "lddw", dst
, imm32
);
715 bpf_insn
= xasprintf ("%s%s %%%s,%s%s", mnem
[type
- BPF_ADD
],
723 build_bpf_endianness (char *dst
, enum bpf_token_type endianness
)
726 static const char *size
[] = {"16", "32", "64"};
729 if (endianness
== BPF_LE16
730 || endianness
== BPF_LE32
731 || endianness
== BPF_LE64
)
734 gas_assert (endianness
== BPF_BE16
|| endianness
== BPF_BE32
|| endianness
== BPF_BE64
);
736 bpf_insn
= xasprintf ("%s %%%s,%s", be
? "endbe" : "endle",
737 dst
, be
? size
[endianness
- BPF_BE16
] : size
[endianness
- BPF_LE16
]);
742 build_bpf_load_store_insn (char *dst
, char *src
,
743 enum bpf_token_type cast
,
744 const char *offset
, int isload
)
747 static const char *cast_rw
[] = {"b", "h", "w", "dw"};
751 bpf_insn
= xasprintf ("%s%s %%%s,[%%%s%s%s]", "ldx",
752 cast_rw
[cast
- BPF_CAST_U8
], dst
, src
,
753 *offset
!= '+' ? "+" : "",
756 bpf_insn
= xasprintf ("%s%s [%%%s%s%s],%%%s", "stx",
757 cast_rw
[cast
- BPF_CAST_U8
], dst
,
758 *offset
!= '+' ? "+" : "",
764 look_for_reserved_word (const char *token
, enum bpf_token_type
*type
)
770 enum bpf_token_type type
;
826 for (i
= 0; reserved_words
[i
].name
; ++i
)
827 if (*reserved_words
[i
].name
== *token
828 && !strcmp (reserved_words
[i
].name
, token
))
830 *type
= reserved_words
[i
].type
;
838 is_register (const char *token
, int len
)
840 if (token
[0] == 'r' || token
[0] == 'w')
841 if ((len
== 2 && isdigit (token
[1]))
842 || (len
== 3 && token
[1] == '1' && token
[2] == '0'))
848 static enum bpf_token_type
849 is_cast (const char *token
)
851 static const char *cast_rw
[] = {"u8", "u16", "u32", "u64"};
854 for (i
= 0; i
< ARRAY_SIZE (cast_rw
); ++i
)
855 if (!strcmp (token
, cast_rw
[i
]))
856 return BPF_CAST_U8
+ i
;
861 static enum bpf_token_type
862 get_token (const char **insn
, char *token
, size_t *tlen
)
867 : *(unsigned char *)(str++))
869 #define UNGET() (--str)
871 #define START_EXPR() \
878 #define SCANNER_SKIP_WHITESPACE() \
884 && ((ch) == ' ' || (ch) == '\t')); \
889 const char *str
= *insn
;
891 enum bpf_token_type ttype
= BPF_UNKNOWN
;
893 const char *expr
= NULL
;
894 const char *end_expr
= NULL
;
896 int return_token
= 0;
902 if (ch
== EOF
|| len
> MAX_TOKEN_SZ
)
905 switch (pseudoc_lex
[(unsigned char) ch
])
907 case LEX_IS_WHITESPACE
:
908 SCANNER_SKIP_WHITESPACE ();
933 case 14: /* s> ' ' */
951 case LEX_IS_ARITHM_OP
:
954 /* ='-' is handle as '=' */
965 #define BPF_ARITHM_OP(op, type) \
971 BPF_ARITHM_OP('+', BPF_ADD
);
972 BPF_ARITHM_OP('-', BPF_SUB
);
973 BPF_ARITHM_OP('*', BPF_MUL
);
974 BPF_ARITHM_OP('/', BPF_DIV
);
975 BPF_ARITHM_OP('|', BPF_OR
);
976 BPF_ARITHM_OP('%', BPF_MOD
);
977 BPF_ARITHM_OP('^', BPF_XOR
);
980 state
= 20; /* '&' */
987 state
= 18; /* '<' */
991 state
= 19; /* <'<' */
995 state
= 22; /* s'<' */
1004 state
= 12; /* '>' */
1008 state
= 13; /* >'>' */
1012 state
= 14; /* s'>' */
1016 state
= 15; /* s>'>' */
1029 state
= 2; /* '*', It could be the fist cast char. */
1032 case 16: /* ='*' Not valid token. */
1038 case 4: /* *(uXX'*' */
1045 case LEX_IS_OPEN_BR
:
1053 state
= 3; /* *'(' second char of a cast or expr. */
1058 if (valid_expr (expr
, &end_expr
))
1060 len
= end_expr
- expr
;
1061 memcpy (token
, expr
, len
);
1068 while (*invalid_expression
)
1069 token
[len
++] = *invalid_expression
++;
1072 ttype
= BPF_UNKNOWN
;
1077 ttype
= BPF_CHR_OPEN_BR
;
1078 SCANNER_SKIP_WHITESPACE ();
1081 if ((isdigit (ch2
) || ch2
== '(')
1082 && valid_expr (expr
, &end_expr
))
1084 len
= end_expr
- expr
;
1085 memcpy (token
, expr
, len
);
1094 case LEX_IS_CLSE_BR
:
1099 ttype
= BPF_CHR_CLSE_BR
;
1102 else if (state
== 5) /* *(uXX*')' */
1113 state
= 16; /* '=' */
1118 state
= 17; /* ='=' */
1126 case 10: /* s>>'=' */
1134 case 13: /* >>'=' */
1138 case 14: /* s>'=' */
1142 case 15: /* s>>'=' */
1150 case 19: /* <<'=' */
1162 case 22: /* s<'=' */
1168 case LEX_IS_SYMBOL_COMPONENT
:
1173 case 17: /* =='sym' */
1177 case 12: /* >'sym' */
1181 case 18: /* <'sym' */
1185 case 20: /* &'sym' */
1189 case 14: /*s>'sym' */
1193 case 22: /* s<'sym' */
1197 case 16: /* ='sym' */
1214 while ((ch2
= GET ()) != EOF
)
1218 type
= pseudoc_lex
[(unsigned char) ch2
];
1219 if (type
!= LEX_IS_SYMBOL_COMPONENT
)
1229 if (len
== 1 && ch
== 's')
1230 state
= 8; /* signed instructions: 's' */
1234 if (is_register (token
, len
))
1236 else if (look_for_reserved_word (token
, &ttype
))
1238 else if ((pseudoc_lex
[(unsigned char) *token
] == LEX_IS_ARITHM_OP
1239 || *token
== '(' || isdigit(*token
))
1240 && valid_expr (expr
, &end_expr
))
1242 len
= end_expr
- expr
;
1251 else if (state
== 3) /* *('sym' */
1253 if ((ttype
= is_cast (&token
[2])) != BPF_UNKNOWN
)
1254 state
= 4; /* *('uXX' */
1261 else if (state
== 6)
1263 if (ttype
== BPF_SUB
) /* neg */
1265 if (is_register (&token
[1], len
- 1))
1267 else if (valid_expr(expr
, &end_expr
))
1269 len
= end_expr
- expr
;
1270 memcpy(token
, expr
, len
);
1277 while (*invalid_expression
)
1278 token
[len
++] = *invalid_expression
++;
1280 ttype
= BPF_UNKNOWN
;
1283 else if (valid_expr (expr
, &end_expr
))
1285 len
= end_expr
- expr
;
1286 memcpy(token
, expr
, len
);
1291 ttype
= BPF_UNKNOWN
;
1311 #undef SCANNER_SKIP_WHITESPACE
1312 #undef BPF_ARITHM_OP
1316 The parser represent a FSM for the grammar described above. So for example
1319 ` bpf_alu_insn : BPF_REG bpf_alu_operator register_or_imm32'
1321 Is parser as follows:
1323 1. It starts in state 0.
1325 2. Consumes next token, e.g: `BPF_REG' and set `state' variable to a
1326 particular state to helps to identify, in this case, that a register
1327 token has been read, a comment surrounded by a single quote in the
1328 pseudo-c token is added along with the new `state' value to indicate
1329 what the scanner has read, e.g.:
1331 state = 6; // dst_reg = str_cast ( 'src_reg'
1333 So, in `state 6' the scanner has consumed: a destination register
1334 (BPF_REG), an equal character (BPF_MOV), a cast token (BPF_CAST), an
1335 open parenthesis (BPF_CHR_OPEN_BR) and the source register (BPF_REG).
1337 3. If the accumulated tokens represent a complete BPF pseudo-c syntax
1338 instruction then, a validation of the terms is made, for example: if
1339 the registers have the same sizes (32/64 bits), if a specific
1340 destination register must be used, etc., after that, a builder:
1341 build_bfp_{non_generic_load,atomic_insn,jmp_insn,arithm_insn,endianness,load_store_insn}
1342 is invoked, internally, it translates the BPF pseudo-c instruction to
1343 a BPF GAS instruction using the previous terms recollected by the
1346 4. If a successful build of BPF GAS instruction was done, a final
1347 state is set to `ST_EOI' (End Of Instruction) meaning that is not
1348 expecting for more tokens in such instruction. Otherwise if the
1349 conditions to calling builder are not satisfied an error is emitted
1350 and `parse_err' is set.
1354 bpf_pseudoc_to_normal_syntax (const char *str
, char **errmsg
)
1356 #define syntax_err(format, ...) \
1362 errbuf = xasprintf (format, ##__VA_ARGS__); \
1366 enum bpf_token_type ttype
;
1367 enum bpf_token_type bpf_endianness
= BPF_UNKNOWN
,
1369 enum bpf_token_type bpf_jmp_op
= BPF_JEQ
; /* Arbitrary. */
1370 enum bpf_token_type bpf_cast
= BPF_CAST_U8
; /* Arbitrary. */
1371 enum bpf_token_type bpf_arithm_op
= BPF_ADD
; /* Arbitrary. */
1372 char *bpf_insn
= NULL
;
1373 char *errbuf
= NULL
;
1374 char src_reg
[3] = {0};
1375 char dst_reg
[3] = {0};
1376 char str_imm32
[40] = {0};
1377 char str_offset
[40] = {0};
1378 char str_symbol
[MAX_TOKEN_SZ
] = {0};
1379 char token
[MAX_TOKEN_SZ
] = {0};
1386 ttype
= get_token (&str
, token
, &tlen
);
1387 if (ttype
== BPF_UNKNOWN
|| state
== ST_EOI
)
1389 syntax_err ("unexpected token: '%s'", token
);
1403 memcpy (dst_reg
, token
, tlen
);
1404 state
= 1; /* 'dst_reg' */
1408 /* dst_reg bpf_op 'src_reg' */
1409 memcpy (src_reg
, token
, tlen
);
1410 if (*dst_reg
== *src_reg
)
1411 bpf_insn
= build_bpf_arithm_insn (dst_reg
, src_reg
, 0,
1412 NULL
, bpf_arithm_op
);
1415 syntax_err ("different register sizes: '%s', '%s'",
1423 memcpy (src_reg
, token
, tlen
);
1424 state
= 6; /* dst_reg = str_cast ( 'src_reg' */
1428 memcpy (dst_reg
, token
, tlen
);
1429 state
= 10; /* str_cast ( 'dst_reg' */
1433 /* str_cast ( dst_reg offset ) = 'src_reg' */
1434 memcpy (src_reg
, token
, tlen
);
1435 bpf_insn
= build_bpf_load_store_insn (dst_reg
, src_reg
,
1436 bpf_cast
, str_offset
, 0);
1441 memcpy (dst_reg
, token
, tlen
);
1442 state
= 15; /* if 'dst_reg' */
1446 memcpy (src_reg
, token
, tlen
);
1447 state
= 17; /* if dst_reg jmp_op 'src_reg' */
1451 /* dst_reg = endianness src_reg */
1452 memcpy (src_reg
, token
, tlen
);
1453 if (*dst_reg
== 'r' && !strcmp (dst_reg
, src_reg
))
1454 bpf_insn
= build_bpf_endianness (dst_reg
, bpf_endianness
);
1456 syntax_err ("invalid operand for instruction: '%s'", token
);
1462 memcpy (dst_reg
, token
, tlen
);
1463 state
= 29; /* lock str_cast ( 'dst_reg' */
1468 /* lock str_cast ( dst_reg offset ) atomic_insn 'src_reg' */
1469 int with_offset
= *str_offset
!= '\0';
1471 memcpy (src_reg
, token
, tlen
);
1472 if ((bpf_cast
!= BPF_CAST_U32
1473 && bpf_cast
!= BPF_CAST_U64
)
1476 syntax_err ("invalid wide atomic instruction");
1478 bpf_insn
= build_bpf_atomic_insn (dst_reg
, src_reg
, bpf_atomic_insn
,
1479 bpf_cast
, with_offset
? str_offset
: str_symbol
);
1486 /* callx 'dst_reg' */
1487 bpf_insn
= xasprintf ("%s %%%s", "call", token
);
1492 memcpy (src_reg
, token
, tlen
);
1493 state
= 36; /* dst_reg = str_cast skb [ 'src_reg' */
1514 state
= 3; /* dst_reg 'arith_op' */
1515 bpf_arithm_op
= ttype
;
1519 if (ttype
== BPF_NEG
)
1522 bpf_arithm_op
= ttype
;
1523 memcpy (src_reg
, token
+ 1, tlen
- 1);
1524 if (strcmp (dst_reg
, src_reg
))
1526 syntax_err ("found: '%s', expected: -%s", token
, dst_reg
);
1530 bpf_insn
= build_bpf_arithm_insn (dst_reg
, src_reg
, 0,
1531 NULL
, bpf_arithm_op
);
1537 memcpy (src_reg
, token
, tlen
);
1538 state
= 11; /* str_cast ( dst_reg offset ) '=' */
1542 if (ttype
== BPF_MOV
)
1543 state
= 13; /* str_cast ( dst_reg offset ) '=' */
1547 bpf_atomic_insn
= ttype
;
1548 state
= 32; /* lock str_cast ( dst_reg offset ) 'atomic_insn' */
1552 syntax_err ("unexpected '%s'", token
);
1565 state
= 4; /* dst_reg = 'str_cast' */
1569 state
= 8; /* 'str_cast' */
1573 state
= 27; /* lock 'str_cast' */
1578 case BPF_CHR_OPEN_BR
:
1582 state
= 5; /* dst_reg = str_cast '(' */
1586 state
= 9; /* str_cast '(' */
1590 state
= 28; /* lock str_cast '(' */
1594 state
= 35; /* dst_reg = str_cast skb '[' */
1599 case BPF_CHR_CLSE_BR
:
1603 /* dst_reg = str_cast ( imm32 ')' */
1604 bpf_insn
= build_bpf_load_store_insn (dst_reg
, src_reg
,
1605 bpf_cast
, str_imm32
, 1);
1610 state
= 12; /* str_cast ( dst_reg imm32 ')' */
1614 /* dst_reg = str_cast ( src_reg offset ')' */
1615 bpf_insn
= build_bpf_load_store_insn (dst_reg
, src_reg
,
1616 bpf_cast
, str_offset
, 1);
1621 state
= 23; /* str_cast ( dst_reg offset ')' */
1625 state
= 31; /* lock str_cast ( dst_reg offset ')' */
1629 /* dst_reg = str_cast skb [ src_reg imm32 ']' */
1630 if (*dst_reg
!= 'w' && !strcmp ("r0", dst_reg
))
1631 bpf_insn
= build_bpf_non_generic_load (*src_reg
!= '\0' ? src_reg
: NULL
,
1632 bpf_cast
, str_imm32
);
1634 syntax_err ("invalid register operand: '%s'", dst_reg
);
1646 /* dst_reg bpf_arithm_op 'imm32' */
1649 memcpy (str_imm32
, token
, tlen
);
1650 memset (token
, 0, tlen
);
1652 if ((ttype
= get_token (&str
, token
, &tlen
)) == BPF_LL
1653 && bpf_arithm_op
== BPF_MOV
)
1655 else if (ttype
!= BPF_UNKNOWN
)
1656 syntax_err ("unexpected token: '%s'", token
);
1658 if (load64
&& *dst_reg
== 'w')
1659 syntax_err ("unexpected register size: '%s'", dst_reg
);
1662 bpf_insn
= build_bpf_arithm_insn (dst_reg
, NULL
, load64
,
1663 str_imm32
, bpf_arithm_op
);
1670 /* if dst_reg jmp_op src_reg goto 'offset' */
1671 int with_src
= *src_reg
!= '\0';
1673 memcpy (str_offset
, token
, tlen
);
1674 if (with_src
&& *dst_reg
!= *src_reg
)
1675 syntax_err ("different register size: '%s', '%s'",
1678 bpf_insn
= build_bpf_jmp_insn (dst_reg
, with_src
? src_reg
: NULL
,
1679 with_src
? NULL
: str_imm32
,
1680 bpf_jmp_op
, NULL
, str_offset
);
1687 memcpy (str_offset
, token
, tlen
);
1688 bpf_insn
= xasprintf ("%s %s", "ja", str_offset
);
1693 memcpy (str_offset
, token
, tlen
);
1694 state
= 21; /* dst_reg = str_cast ( src_reg 'offset' */
1698 memcpy (str_offset
, token
, tlen
);
1699 state
= 22; /* str_cast ( dst_reg 'offset' */
1703 memcpy (str_imm32
, token
, tlen
);
1704 state
= 25; /* if dst_reg jmp_op 'imm32' */
1708 memcpy (str_offset
, token
, tlen
);
1709 state
= 30; /* lock str_cast ( dst_reg 'offset' */
1713 /* dst_reg = str_cast skb 'imm32' */
1714 if (*dst_reg
!= 'w' && !strcmp ("r0", dst_reg
))
1716 memcpy (str_imm32
, token
, tlen
);
1717 bpf_insn
= build_bpf_non_generic_load (*src_reg
!= '\0' ? src_reg
: NULL
,
1718 bpf_cast
, str_imm32
);
1721 syntax_err ("invalid register operand: '%s'", dst_reg
);
1727 memcpy (str_imm32
, token
, tlen
);
1728 state
= 37; /* dst_reg = str_cast skb [ src_reg 'imm32' */
1752 state
= 16; /* if dst_reg 'jmp_op' */
1761 state
= 18; /* if dst_reg jmp_op src_reg|imm32 'goto' */
1775 /* if dst_reg jmp_op src_reg goto 'sym' */
1776 int with_src
= *src_reg
!= '\0';
1778 memcpy (str_symbol
, token
, tlen
);
1779 if (with_src
&& *dst_reg
!= *src_reg
)
1780 syntax_err ("different register size: '%s', '%s'",
1783 bpf_insn
= build_bpf_jmp_insn (dst_reg
, with_src
? src_reg
: NULL
,
1784 with_src
? NULL
: str_imm32
,
1785 bpf_jmp_op
, str_symbol
, NULL
);
1792 memcpy (str_symbol
, token
, tlen
);
1793 bpf_insn
= xasprintf ("%s %s", "ja", str_symbol
);
1803 /* dst_reg arithm_op 'sym' */
1806 memcpy (str_symbol
, token
, tlen
);
1807 memset (token
, 0, tlen
);
1809 if ((ttype
= get_token (&str
, token
, &tlen
)) == BPF_LL
1810 && bpf_arithm_op
== BPF_MOV
)
1812 else if (ttype
!= BPF_UNKNOWN
)
1813 syntax_err ("unexpected token: '%s'", token
);
1815 if (load64
&& *dst_reg
== 'w')
1816 syntax_err ("unexpected register size: '%s'", dst_reg
);
1819 bpf_insn
= build_bpf_arithm_insn (dst_reg
, NULL
, load64
,
1820 str_symbol
, bpf_arithm_op
);
1833 bpf_endianness
= ttype
;
1834 state
= 24; /* dst_reg = 'endianness' */
1846 state
= 34; /* dst_reg = str_cast 'skb' */
1850 memset (token
, 0, tlen
);
1853 if (state
!= ST_EOI
)
1854 syntax_err ("incomplete instruction");
1863 md_assemble (char *str
)
1865 const CGEN_INSN
*insn
;
1872 CGEN_INSN_INT buffer
[CGEN_MAX_INSN_SIZE
/ sizeof (CGEN_INT_INSN_P
)];
1874 unsigned char buffer
[CGEN_MAX_INSN_SIZE
];
1877 gas_cgen_init_parse ();
1878 insn
= bpf_cgen_assemble_insn (gas_cgen_cpu_desc
, str
, &fields
,
1882 normal
= bpf_pseudoc_to_normal_syntax (str
, &a_errmsg
);
1885 insn
= bpf_cgen_assemble_insn (gas_cgen_cpu_desc
, normal
, &fields
,
1892 as_bad ("%s", errmsg
);
1895 as_bad ("%s", a_errmsg
);
1902 gas_cgen_finish_insn (insn
, buffer
, CGEN_FIELDS_BITSIZE (&fields
),
1903 0, /* zero to ban relaxable insns. */
1904 NULL
); /* NULL so results not returned here. */
1908 md_operand (expressionS
*expressionP
)
1910 invalid_expression
= input_line_pointer
- 1;
1911 gas_cgen_md_operand (expressionP
);
1916 md_undefined_symbol (char *name ATTRIBUTE_UNUSED
)
1922 /* Turn a string in input_line_pointer into a floating point constant
1923 of type TYPE, and store the appropriate bytes in *LITP. The number
1924 of LITTLENUMS emitted is stored in *SIZEP. An error message is
1925 returned, or NULL on OK. */
1928 md_atof (int type
, char *litP
, int *sizeP
)
1930 return ieee_md_atof (type
, litP
, sizeP
, false);