2 * Copyright (C) 2016-2017 Netronome Systems, Inc.
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
9 * The BSD 2-Clause License:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #define pr_fmt(fmt) "NFP net bpf: " fmt
36 #include <linux/bug.h>
37 #include <linux/kernel.h>
38 #include <linux/bpf.h>
39 #include <linux/filter.h>
40 #include <linux/pkt_cls.h>
41 #include <linux/unistd.h>
44 #include "../nfp_asm.h"
46 /* --- NFP prog --- */
47 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
48 * It's safe to modify the next pointers (but not pos).
50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \
51 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
52 next = list_next_entry(pos, l); \
53 &(nfp_prog)->insns != &pos->l && \
54 &(nfp_prog)->insns != &next->l; \
55 pos = nfp_meta_next(pos), \
56 next = nfp_meta_next(pos))
58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \
59 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
60 next = list_next_entry(pos, l), \
61 next2 = list_next_entry(next, l); \
62 &(nfp_prog)->insns != &pos->l && \
63 &(nfp_prog)->insns != &next->l && \
64 &(nfp_prog)->insns != &next2->l; \
65 pos = nfp_meta_next(pos), \
66 next = nfp_meta_next(pos), \
67 next2 = nfp_meta_next(next))
70 nfp_meta_has_prev(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
72 return meta
->l
.prev
!= &nfp_prog
->insns
;
75 static void nfp_prog_push(struct nfp_prog
*nfp_prog
, u64 insn
)
77 if (nfp_prog
->__prog_alloc_len
/ sizeof(u64
) == nfp_prog
->prog_len
) {
78 pr_warn("instruction limit reached (%u NFP instructions)\n",
80 nfp_prog
->error
= -ENOSPC
;
84 nfp_prog
->prog
[nfp_prog
->prog_len
] = insn
;
88 static unsigned int nfp_prog_current_offset(struct nfp_prog
*nfp_prog
)
90 return nfp_prog
->prog_len
;
94 nfp_prog_confirm_current_offset(struct nfp_prog
*nfp_prog
, unsigned int off
)
96 /* If there is a recorded error we may have dropped instructions;
97 * that doesn't have to be due to translator bug, and the translation
98 * will fail anyway, so just return OK.
102 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog
) != off
);
105 /* --- Emitters --- */
107 __emit_cmd(struct nfp_prog
*nfp_prog
, enum cmd_tgt_map op
,
108 u8 mode
, u8 xfer
, u8 areg
, u8 breg
, u8 size
, bool sync
, bool indir
)
110 enum cmd_ctx_swap ctx
;
116 ctx
= CMD_CTX_NO_SWAP
;
118 insn
= FIELD_PREP(OP_CMD_A_SRC
, areg
) |
119 FIELD_PREP(OP_CMD_CTX
, ctx
) |
120 FIELD_PREP(OP_CMD_B_SRC
, breg
) |
121 FIELD_PREP(OP_CMD_TOKEN
, cmd_tgt_act
[op
].token
) |
122 FIELD_PREP(OP_CMD_XFER
, xfer
) |
123 FIELD_PREP(OP_CMD_CNT
, size
) |
124 FIELD_PREP(OP_CMD_SIG
, sync
) |
125 FIELD_PREP(OP_CMD_TGT_CMD
, cmd_tgt_act
[op
].tgt_cmd
) |
126 FIELD_PREP(OP_CMD_INDIR
, indir
) |
127 FIELD_PREP(OP_CMD_MODE
, mode
);
129 nfp_prog_push(nfp_prog
, insn
);
133 emit_cmd_any(struct nfp_prog
*nfp_prog
, enum cmd_tgt_map op
, u8 mode
, u8 xfer
,
134 swreg lreg
, swreg rreg
, u8 size
, bool sync
, bool indir
)
136 struct nfp_insn_re_regs reg
;
139 err
= swreg_to_restricted(reg_none(), lreg
, rreg
, ®
, false);
141 nfp_prog
->error
= err
;
145 pr_err("cmd can't swap arguments\n");
146 nfp_prog
->error
= -EFAULT
;
149 if (reg
.dst_lmextn
|| reg
.src_lmextn
) {
150 pr_err("cmd can't use LMextn\n");
151 nfp_prog
->error
= -EFAULT
;
155 __emit_cmd(nfp_prog
, op
, mode
, xfer
, reg
.areg
, reg
.breg
, size
, sync
,
160 emit_cmd(struct nfp_prog
*nfp_prog
, enum cmd_tgt_map op
, u8 mode
, u8 xfer
,
161 swreg lreg
, swreg rreg
, u8 size
, bool sync
)
163 emit_cmd_any(nfp_prog
, op
, mode
, xfer
, lreg
, rreg
, size
, sync
, false);
167 emit_cmd_indir(struct nfp_prog
*nfp_prog
, enum cmd_tgt_map op
, u8 mode
, u8 xfer
,
168 swreg lreg
, swreg rreg
, u8 size
, bool sync
)
170 emit_cmd_any(nfp_prog
, op
, mode
, xfer
, lreg
, rreg
, size
, sync
, true);
174 __emit_br(struct nfp_prog
*nfp_prog
, enum br_mask mask
, enum br_ev_pip ev_pip
,
175 enum br_ctx_signal_state css
, u16 addr
, u8 defer
)
177 u16 addr_lo
, addr_hi
;
180 addr_lo
= addr
& (OP_BR_ADDR_LO
>> __bf_shf(OP_BR_ADDR_LO
));
181 addr_hi
= addr
!= addr_lo
;
184 FIELD_PREP(OP_BR_MASK
, mask
) |
185 FIELD_PREP(OP_BR_EV_PIP
, ev_pip
) |
186 FIELD_PREP(OP_BR_CSS
, css
) |
187 FIELD_PREP(OP_BR_DEFBR
, defer
) |
188 FIELD_PREP(OP_BR_ADDR_LO
, addr_lo
) |
189 FIELD_PREP(OP_BR_ADDR_HI
, addr_hi
);
191 nfp_prog_push(nfp_prog
, insn
);
195 emit_br_relo(struct nfp_prog
*nfp_prog
, enum br_mask mask
, u16 addr
, u8 defer
,
196 enum nfp_relo_type relo
)
198 if (mask
== BR_UNC
&& defer
> 2) {
199 pr_err("BUG: branch defer out of bounds %d\n", defer
);
200 nfp_prog
->error
= -EFAULT
;
204 __emit_br(nfp_prog
, mask
,
205 mask
!= BR_UNC
? BR_EV_PIP_COND
: BR_EV_PIP_UNCOND
,
206 BR_CSS_NONE
, addr
, defer
);
208 nfp_prog
->prog
[nfp_prog
->prog_len
- 1] |=
209 FIELD_PREP(OP_RELO_TYPE
, relo
);
213 emit_br(struct nfp_prog
*nfp_prog
, enum br_mask mask
, u16 addr
, u8 defer
)
215 emit_br_relo(nfp_prog
, mask
, addr
, defer
, RELO_BR_REL
);
219 __emit_immed(struct nfp_prog
*nfp_prog
, u16 areg
, u16 breg
, u16 imm_hi
,
220 enum immed_width width
, bool invert
,
221 enum immed_shift shift
, bool wr_both
,
222 bool dst_lmextn
, bool src_lmextn
)
226 insn
= OP_IMMED_BASE
|
227 FIELD_PREP(OP_IMMED_A_SRC
, areg
) |
228 FIELD_PREP(OP_IMMED_B_SRC
, breg
) |
229 FIELD_PREP(OP_IMMED_IMM
, imm_hi
) |
230 FIELD_PREP(OP_IMMED_WIDTH
, width
) |
231 FIELD_PREP(OP_IMMED_INV
, invert
) |
232 FIELD_PREP(OP_IMMED_SHIFT
, shift
) |
233 FIELD_PREP(OP_IMMED_WR_AB
, wr_both
) |
234 FIELD_PREP(OP_IMMED_SRC_LMEXTN
, src_lmextn
) |
235 FIELD_PREP(OP_IMMED_DST_LMEXTN
, dst_lmextn
);
237 nfp_prog_push(nfp_prog
, insn
);
241 emit_immed(struct nfp_prog
*nfp_prog
, swreg dst
, u16 imm
,
242 enum immed_width width
, bool invert
, enum immed_shift shift
)
244 struct nfp_insn_ur_regs reg
;
247 if (swreg_type(dst
) == NN_REG_IMM
) {
248 nfp_prog
->error
= -EFAULT
;
252 err
= swreg_to_unrestricted(dst
, dst
, reg_imm(imm
& 0xff), ®
);
254 nfp_prog
->error
= err
;
258 /* Use reg.dst when destination is No-Dest. */
259 __emit_immed(nfp_prog
,
260 swreg_type(dst
) == NN_REG_NONE
? reg
.dst
: reg
.areg
,
261 reg
.breg
, imm
>> 8, width
, invert
, shift
,
262 reg
.wr_both
, reg
.dst_lmextn
, reg
.src_lmextn
);
266 __emit_shf(struct nfp_prog
*nfp_prog
, u16 dst
, enum alu_dst_ab dst_ab
,
267 enum shf_sc sc
, u8 shift
,
268 u16 areg
, enum shf_op op
, u16 breg
, bool i8
, bool sw
, bool wr_both
,
269 bool dst_lmextn
, bool src_lmextn
)
273 if (!FIELD_FIT(OP_SHF_SHIFT
, shift
)) {
274 nfp_prog
->error
= -EFAULT
;
278 if (sc
== SHF_SC_L_SHF
)
282 FIELD_PREP(OP_SHF_A_SRC
, areg
) |
283 FIELD_PREP(OP_SHF_SC
, sc
) |
284 FIELD_PREP(OP_SHF_B_SRC
, breg
) |
285 FIELD_PREP(OP_SHF_I8
, i8
) |
286 FIELD_PREP(OP_SHF_SW
, sw
) |
287 FIELD_PREP(OP_SHF_DST
, dst
) |
288 FIELD_PREP(OP_SHF_SHIFT
, shift
) |
289 FIELD_PREP(OP_SHF_OP
, op
) |
290 FIELD_PREP(OP_SHF_DST_AB
, dst_ab
) |
291 FIELD_PREP(OP_SHF_WR_AB
, wr_both
) |
292 FIELD_PREP(OP_SHF_SRC_LMEXTN
, src_lmextn
) |
293 FIELD_PREP(OP_SHF_DST_LMEXTN
, dst_lmextn
);
295 nfp_prog_push(nfp_prog
, insn
);
299 emit_shf(struct nfp_prog
*nfp_prog
, swreg dst
,
300 swreg lreg
, enum shf_op op
, swreg rreg
, enum shf_sc sc
, u8 shift
)
302 struct nfp_insn_re_regs reg
;
305 err
= swreg_to_restricted(dst
, lreg
, rreg
, ®
, true);
307 nfp_prog
->error
= err
;
311 __emit_shf(nfp_prog
, reg
.dst
, reg
.dst_ab
, sc
, shift
,
312 reg
.areg
, op
, reg
.breg
, reg
.i8
, reg
.swap
, reg
.wr_both
,
313 reg
.dst_lmextn
, reg
.src_lmextn
);
317 __emit_alu(struct nfp_prog
*nfp_prog
, u16 dst
, enum alu_dst_ab dst_ab
,
318 u16 areg
, enum alu_op op
, u16 breg
, bool swap
, bool wr_both
,
319 bool dst_lmextn
, bool src_lmextn
)
324 FIELD_PREP(OP_ALU_A_SRC
, areg
) |
325 FIELD_PREP(OP_ALU_B_SRC
, breg
) |
326 FIELD_PREP(OP_ALU_DST
, dst
) |
327 FIELD_PREP(OP_ALU_SW
, swap
) |
328 FIELD_PREP(OP_ALU_OP
, op
) |
329 FIELD_PREP(OP_ALU_DST_AB
, dst_ab
) |
330 FIELD_PREP(OP_ALU_WR_AB
, wr_both
) |
331 FIELD_PREP(OP_ALU_SRC_LMEXTN
, src_lmextn
) |
332 FIELD_PREP(OP_ALU_DST_LMEXTN
, dst_lmextn
);
334 nfp_prog_push(nfp_prog
, insn
);
338 emit_alu(struct nfp_prog
*nfp_prog
, swreg dst
,
339 swreg lreg
, enum alu_op op
, swreg rreg
)
341 struct nfp_insn_ur_regs reg
;
344 err
= swreg_to_unrestricted(dst
, lreg
, rreg
, ®
);
346 nfp_prog
->error
= err
;
350 __emit_alu(nfp_prog
, reg
.dst
, reg
.dst_ab
,
351 reg
.areg
, op
, reg
.breg
, reg
.swap
, reg
.wr_both
,
352 reg
.dst_lmextn
, reg
.src_lmextn
);
356 __emit_ld_field(struct nfp_prog
*nfp_prog
, enum shf_sc sc
,
357 u8 areg
, u8 bmask
, u8 breg
, u8 shift
, bool imm8
,
358 bool zero
, bool swap
, bool wr_both
,
359 bool dst_lmextn
, bool src_lmextn
)
364 FIELD_PREP(OP_LDF_A_SRC
, areg
) |
365 FIELD_PREP(OP_LDF_SC
, sc
) |
366 FIELD_PREP(OP_LDF_B_SRC
, breg
) |
367 FIELD_PREP(OP_LDF_I8
, imm8
) |
368 FIELD_PREP(OP_LDF_SW
, swap
) |
369 FIELD_PREP(OP_LDF_ZF
, zero
) |
370 FIELD_PREP(OP_LDF_BMASK
, bmask
) |
371 FIELD_PREP(OP_LDF_SHF
, shift
) |
372 FIELD_PREP(OP_LDF_WR_AB
, wr_both
) |
373 FIELD_PREP(OP_LDF_SRC_LMEXTN
, src_lmextn
) |
374 FIELD_PREP(OP_LDF_DST_LMEXTN
, dst_lmextn
);
376 nfp_prog_push(nfp_prog
, insn
);
380 emit_ld_field_any(struct nfp_prog
*nfp_prog
, swreg dst
, u8 bmask
, swreg src
,
381 enum shf_sc sc
, u8 shift
, bool zero
)
383 struct nfp_insn_re_regs reg
;
386 /* Note: ld_field is special as it uses one of the src regs as dst */
387 err
= swreg_to_restricted(dst
, dst
, src
, ®
, true);
389 nfp_prog
->error
= err
;
393 __emit_ld_field(nfp_prog
, sc
, reg
.areg
, bmask
, reg
.breg
, shift
,
394 reg
.i8
, zero
, reg
.swap
, reg
.wr_both
,
395 reg
.dst_lmextn
, reg
.src_lmextn
);
399 emit_ld_field(struct nfp_prog
*nfp_prog
, swreg dst
, u8 bmask
, swreg src
,
400 enum shf_sc sc
, u8 shift
)
402 emit_ld_field_any(nfp_prog
, dst
, bmask
, src
, sc
, shift
, false);
406 __emit_lcsr(struct nfp_prog
*nfp_prog
, u16 areg
, u16 breg
, bool wr
, u16 addr
,
407 bool dst_lmextn
, bool src_lmextn
)
411 insn
= OP_LCSR_BASE
|
412 FIELD_PREP(OP_LCSR_A_SRC
, areg
) |
413 FIELD_PREP(OP_LCSR_B_SRC
, breg
) |
414 FIELD_PREP(OP_LCSR_WRITE
, wr
) |
415 FIELD_PREP(OP_LCSR_ADDR
, addr
) |
416 FIELD_PREP(OP_LCSR_SRC_LMEXTN
, src_lmextn
) |
417 FIELD_PREP(OP_LCSR_DST_LMEXTN
, dst_lmextn
);
419 nfp_prog_push(nfp_prog
, insn
);
422 static void emit_csr_wr(struct nfp_prog
*nfp_prog
, swreg src
, u16 addr
)
424 struct nfp_insn_ur_regs reg
;
427 /* This instruction takes immeds instead of reg_none() for the ignored
428 * operand, but we can't encode 2 immeds in one instr with our normal
429 * swreg infra so if param is an immed, we encode as reg_none() and
430 * copy the immed to both operands.
432 if (swreg_type(src
) == NN_REG_IMM
) {
433 err
= swreg_to_unrestricted(reg_none(), src
, reg_none(), ®
);
436 err
= swreg_to_unrestricted(reg_none(), src
, reg_imm(0), ®
);
439 nfp_prog
->error
= err
;
443 __emit_lcsr(nfp_prog
, reg
.areg
, reg
.breg
, true, addr
/ 4,
444 false, reg
.src_lmextn
);
447 static void emit_nop(struct nfp_prog
*nfp_prog
)
449 __emit_immed(nfp_prog
, UR_REG_IMM
, UR_REG_IMM
, 0, 0, 0, 0, 0, 0, 0);
452 /* --- Wrappers --- */
453 static bool pack_immed(u32 imm
, u16
*val
, enum immed_shift
*shift
)
455 if (!(imm
& 0xffff0000)) {
457 *shift
= IMMED_SHIFT_0B
;
458 } else if (!(imm
& 0xff0000ff)) {
460 *shift
= IMMED_SHIFT_1B
;
461 } else if (!(imm
& 0x0000ffff)) {
463 *shift
= IMMED_SHIFT_2B
;
471 static void wrp_immed(struct nfp_prog
*nfp_prog
, swreg dst
, u32 imm
)
473 enum immed_shift shift
;
476 if (pack_immed(imm
, &val
, &shift
)) {
477 emit_immed(nfp_prog
, dst
, val
, IMMED_WIDTH_ALL
, false, shift
);
478 } else if (pack_immed(~imm
, &val
, &shift
)) {
479 emit_immed(nfp_prog
, dst
, val
, IMMED_WIDTH_ALL
, true, shift
);
481 emit_immed(nfp_prog
, dst
, imm
& 0xffff, IMMED_WIDTH_ALL
,
482 false, IMMED_SHIFT_0B
);
483 emit_immed(nfp_prog
, dst
, imm
>> 16, IMMED_WIDTH_WORD
,
484 false, IMMED_SHIFT_2B
);
489 wrp_immed_relo(struct nfp_prog
*nfp_prog
, swreg dst
, u32 imm
,
490 enum nfp_relo_type relo
)
493 pr_err("relocation of a large immediate!\n");
494 nfp_prog
->error
= -EFAULT
;
497 emit_immed(nfp_prog
, dst
, imm
, IMMED_WIDTH_ALL
, false, IMMED_SHIFT_0B
);
499 nfp_prog
->prog
[nfp_prog
->prog_len
- 1] |=
500 FIELD_PREP(OP_RELO_TYPE
, relo
);
503 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
504 * If the @imm is small enough encode it directly in operand and return
505 * otherwise load @imm to a spare register and return its encoding.
507 static swreg
ur_load_imm_any(struct nfp_prog
*nfp_prog
, u32 imm
, swreg tmp_reg
)
509 if (FIELD_FIT(UR_REG_IMM_MAX
, imm
))
512 wrp_immed(nfp_prog
, tmp_reg
, imm
);
516 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
517 * If the @imm is small enough encode it directly in operand and return
518 * otherwise load @imm to a spare register and return its encoding.
520 static swreg
re_load_imm_any(struct nfp_prog
*nfp_prog
, u32 imm
, swreg tmp_reg
)
522 if (FIELD_FIT(RE_REG_IMM_MAX
, imm
))
525 wrp_immed(nfp_prog
, tmp_reg
, imm
);
529 static void wrp_nops(struct nfp_prog
*nfp_prog
, unsigned int count
)
535 static void wrp_mov(struct nfp_prog
*nfp_prog
, swreg dst
, swreg src
)
537 emit_alu(nfp_prog
, dst
, reg_none(), ALU_OP_NONE
, src
);
540 static void wrp_reg_mov(struct nfp_prog
*nfp_prog
, u16 dst
, u16 src
)
542 wrp_mov(nfp_prog
, reg_both(dst
), reg_b(src
));
545 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
546 * result to @dst from low end.
549 wrp_reg_subpart(struct nfp_prog
*nfp_prog
, swreg dst
, swreg src
, u8 field_len
,
552 enum shf_sc sc
= offset
? SHF_SC_R_SHF
: SHF_SC_NONE
;
553 u8 mask
= (1 << field_len
) - 1;
555 emit_ld_field_any(nfp_prog
, dst
, mask
, src
, sc
, offset
* 8, true);
559 addr40_offset(struct nfp_prog
*nfp_prog
, u8 src_gpr
, swreg offset
,
560 swreg
*rega
, swreg
*regb
)
562 if (offset
== reg_imm(0)) {
563 *rega
= reg_a(src_gpr
);
564 *regb
= reg_b(src_gpr
+ 1);
568 emit_alu(nfp_prog
, imm_a(nfp_prog
), reg_a(src_gpr
), ALU_OP_ADD
, offset
);
569 emit_alu(nfp_prog
, imm_b(nfp_prog
), reg_b(src_gpr
+ 1), ALU_OP_ADD_C
,
571 *rega
= imm_a(nfp_prog
);
572 *regb
= imm_b(nfp_prog
);
575 /* NFP has Command Push Pull bus which supports bluk memory operations. */
576 static int nfp_cpp_memcpy(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
578 bool descending_seq
= meta
->ldst_gather_len
< 0;
579 s16 len
= abs(meta
->ldst_gather_len
);
585 off
= re_load_imm_any(nfp_prog
, meta
->insn
.off
, imm_b(nfp_prog
));
586 src_40bit_addr
= meta
->ptr
.type
== PTR_TO_MAP_VALUE
;
587 src_base
= reg_a(meta
->insn
.src_reg
* 2);
588 xfer_num
= round_up(len
, 4) / 4;
591 addr40_offset(nfp_prog
, meta
->insn
.src_reg
, off
, &src_base
,
594 /* Setup PREV_ALU fields to override memory read length. */
596 wrp_immed(nfp_prog
, reg_none(),
597 CMD_OVE_LEN
| FIELD_PREP(CMD_OV_LEN
, xfer_num
- 1));
599 /* Memory read from source addr into transfer-in registers. */
600 emit_cmd_any(nfp_prog
, CMD_TGT_READ32_SWAP
,
601 src_40bit_addr
? CMD_MODE_40b_BA
: CMD_MODE_32b
, 0,
602 src_base
, off
, xfer_num
- 1, true, len
> 32);
604 /* Move from transfer-in to transfer-out. */
605 for (i
= 0; i
< xfer_num
; i
++)
606 wrp_mov(nfp_prog
, reg_xfer(i
), reg_xfer(i
));
608 off
= re_load_imm_any(nfp_prog
, meta
->paired_st
->off
, imm_b(nfp_prog
));
611 /* Use single direct_ref write8. */
612 emit_cmd(nfp_prog
, CMD_TGT_WRITE8_SWAP
, CMD_MODE_32b
, 0,
613 reg_a(meta
->paired_st
->dst_reg
* 2), off
, len
- 1,
615 } else if (len
<= 32 && IS_ALIGNED(len
, 4)) {
616 /* Use single direct_ref write32. */
617 emit_cmd(nfp_prog
, CMD_TGT_WRITE32_SWAP
, CMD_MODE_32b
, 0,
618 reg_a(meta
->paired_st
->dst_reg
* 2), off
, xfer_num
- 1,
620 } else if (len
<= 32) {
621 /* Use single indirect_ref write8. */
622 wrp_immed(nfp_prog
, reg_none(),
623 CMD_OVE_LEN
| FIELD_PREP(CMD_OV_LEN
, len
- 1));
624 emit_cmd_indir(nfp_prog
, CMD_TGT_WRITE8_SWAP
, CMD_MODE_32b
, 0,
625 reg_a(meta
->paired_st
->dst_reg
* 2), off
,
627 } else if (IS_ALIGNED(len
, 4)) {
628 /* Use single indirect_ref write32. */
629 wrp_immed(nfp_prog
, reg_none(),
630 CMD_OVE_LEN
| FIELD_PREP(CMD_OV_LEN
, xfer_num
- 1));
631 emit_cmd_indir(nfp_prog
, CMD_TGT_WRITE32_SWAP
, CMD_MODE_32b
, 0,
632 reg_a(meta
->paired_st
->dst_reg
* 2), off
,
634 } else if (len
<= 40) {
635 /* Use one direct_ref write32 to write the first 32-bytes, then
636 * another direct_ref write8 to write the remaining bytes.
638 emit_cmd(nfp_prog
, CMD_TGT_WRITE32_SWAP
, CMD_MODE_32b
, 0,
639 reg_a(meta
->paired_st
->dst_reg
* 2), off
, 7,
642 off
= re_load_imm_any(nfp_prog
, meta
->paired_st
->off
+ 32,
644 emit_cmd(nfp_prog
, CMD_TGT_WRITE8_SWAP
, CMD_MODE_32b
, 8,
645 reg_a(meta
->paired_st
->dst_reg
* 2), off
, len
- 33,
648 /* Use one indirect_ref write32 to write 4-bytes aligned length,
649 * then another direct_ref write8 to write the remaining bytes.
653 wrp_immed(nfp_prog
, reg_none(),
654 CMD_OVE_LEN
| FIELD_PREP(CMD_OV_LEN
, xfer_num
- 2));
655 emit_cmd_indir(nfp_prog
, CMD_TGT_WRITE32_SWAP
, CMD_MODE_32b
, 0,
656 reg_a(meta
->paired_st
->dst_reg
* 2), off
,
658 new_off
= meta
->paired_st
->off
+ (xfer_num
- 1) * 4;
659 off
= re_load_imm_any(nfp_prog
, new_off
, imm_b(nfp_prog
));
660 emit_cmd(nfp_prog
, CMD_TGT_WRITE8_SWAP
, CMD_MODE_32b
,
661 xfer_num
- 1, reg_a(meta
->paired_st
->dst_reg
* 2), off
,
662 (len
& 0x3) - 1, true);
665 /* TODO: The following extra load is to make sure data flow be identical
666 * before and after we do memory copy optimization.
668 * The load destination register is not guaranteed to be dead, so we
669 * need to make sure it is loaded with the value the same as before
670 * this transformation.
672 * These extra loads could be removed once we have accurate register
677 else if (BPF_SIZE(meta
->insn
.code
) != BPF_DW
)
678 xfer_num
= xfer_num
- 1;
680 xfer_num
= xfer_num
- 2;
682 switch (BPF_SIZE(meta
->insn
.code
)) {
684 wrp_reg_subpart(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2),
685 reg_xfer(xfer_num
), 1,
686 IS_ALIGNED(len
, 4) ? 3 : (len
& 3) - 1);
689 wrp_reg_subpart(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2),
690 reg_xfer(xfer_num
), 2, (len
& 3) ^ 2);
693 wrp_mov(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2),
697 wrp_mov(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2),
699 wrp_mov(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2 + 1),
700 reg_xfer(xfer_num
+ 1));
704 if (BPF_SIZE(meta
->insn
.code
) != BPF_DW
)
705 wrp_immed(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2 + 1), 0);
711 data_ld(struct nfp_prog
*nfp_prog
, swreg offset
, u8 dst_gpr
, int size
)
716 /* We load the value from the address indicated in @offset and then
717 * shift out the data we don't need. Note: this is big endian!
720 shift
= size
< 4 ? 4 - size
: 0;
722 emit_cmd(nfp_prog
, CMD_TGT_READ8
, CMD_MODE_32b
, 0,
723 pptr_reg(nfp_prog
), offset
, sz
- 1, true);
727 emit_shf(nfp_prog
, reg_both(dst_gpr
), reg_none(), SHF_OP_NONE
,
728 reg_xfer(0), SHF_SC_R_SHF
, shift
* 8);
730 for (; i
* 4 < size
; i
++)
731 wrp_mov(nfp_prog
, reg_both(dst_gpr
+ i
), reg_xfer(i
));
734 wrp_immed(nfp_prog
, reg_both(dst_gpr
+ 1), 0);
740 data_ld_host_order(struct nfp_prog
*nfp_prog
, u8 dst_gpr
,
741 swreg lreg
, swreg rreg
, int size
, enum cmd_mode mode
)
746 /* We load the value from the address indicated in rreg + lreg and then
747 * mask out the data we don't need. Note: this is little endian!
750 mask
= size
< 4 ? GENMASK(size
- 1, 0) : 0;
752 emit_cmd(nfp_prog
, CMD_TGT_READ32_SWAP
, mode
, 0,
753 lreg
, rreg
, sz
/ 4 - 1, true);
757 emit_ld_field_any(nfp_prog
, reg_both(dst_gpr
), mask
,
758 reg_xfer(0), SHF_SC_NONE
, 0, true);
760 for (; i
* 4 < size
; i
++)
761 wrp_mov(nfp_prog
, reg_both(dst_gpr
+ i
), reg_xfer(i
));
764 wrp_immed(nfp_prog
, reg_both(dst_gpr
+ 1), 0);
770 data_ld_host_order_addr32(struct nfp_prog
*nfp_prog
, u8 src_gpr
, swreg offset
,
773 return data_ld_host_order(nfp_prog
, dst_gpr
, reg_a(src_gpr
), offset
,
778 data_ld_host_order_addr40(struct nfp_prog
*nfp_prog
, u8 src_gpr
, swreg offset
,
783 addr40_offset(nfp_prog
, src_gpr
, offset
, ®a
, ®b
);
785 return data_ld_host_order(nfp_prog
, dst_gpr
, rega
, regb
,
786 size
, CMD_MODE_40b_BA
);
790 construct_data_ind_ld(struct nfp_prog
*nfp_prog
, u16 offset
, u16 src
, u8 size
)
794 /* Calculate the true offset (src_reg + imm) */
795 tmp_reg
= ur_load_imm_any(nfp_prog
, offset
, imm_b(nfp_prog
));
796 emit_alu(nfp_prog
, imm_both(nfp_prog
), reg_a(src
), ALU_OP_ADD
, tmp_reg
);
798 /* Check packet length (size guaranteed to fit b/c it's u8) */
799 emit_alu(nfp_prog
, imm_a(nfp_prog
),
800 imm_a(nfp_prog
), ALU_OP_ADD
, reg_imm(size
));
801 emit_alu(nfp_prog
, reg_none(),
802 plen_reg(nfp_prog
), ALU_OP_SUB
, imm_a(nfp_prog
));
803 emit_br_relo(nfp_prog
, BR_BLO
, BR_OFF_RELO
, 0, RELO_BR_GO_ABORT
);
806 return data_ld(nfp_prog
, imm_b(nfp_prog
), 0, size
);
809 static int construct_data_ld(struct nfp_prog
*nfp_prog
, u16 offset
, u8 size
)
813 /* Check packet length */
814 tmp_reg
= ur_load_imm_any(nfp_prog
, offset
+ size
, imm_a(nfp_prog
));
815 emit_alu(nfp_prog
, reg_none(), plen_reg(nfp_prog
), ALU_OP_SUB
, tmp_reg
);
816 emit_br_relo(nfp_prog
, BR_BLO
, BR_OFF_RELO
, 0, RELO_BR_GO_ABORT
);
819 tmp_reg
= re_load_imm_any(nfp_prog
, offset
, imm_b(nfp_prog
));
820 return data_ld(nfp_prog
, tmp_reg
, 0, size
);
824 data_stx_host_order(struct nfp_prog
*nfp_prog
, u8 dst_gpr
, swreg offset
,
829 for (i
= 0; i
* 4 < size
; i
++)
830 wrp_mov(nfp_prog
, reg_xfer(i
), reg_a(src_gpr
+ i
));
832 emit_cmd(nfp_prog
, CMD_TGT_WRITE8_SWAP
, CMD_MODE_32b
, 0,
833 reg_a(dst_gpr
), offset
, size
- 1, true);
839 data_st_host_order(struct nfp_prog
*nfp_prog
, u8 dst_gpr
, swreg offset
,
842 wrp_immed(nfp_prog
, reg_xfer(0), imm
);
844 wrp_immed(nfp_prog
, reg_xfer(1), imm
>> 32);
846 emit_cmd(nfp_prog
, CMD_TGT_WRITE8_SWAP
, CMD_MODE_32b
, 0,
847 reg_a(dst_gpr
), offset
, size
- 1, true);
853 (*lmem_step
)(struct nfp_prog
*nfp_prog
, u8 gpr
, u8 gpr_byte
, s32 off
,
854 unsigned int size
, bool first
, bool new_gpr
, bool last
, bool lm3
,
858 wrp_lmem_load(struct nfp_prog
*nfp_prog
, u8 dst
, u8 dst_byte
, s32 off
,
859 unsigned int size
, bool first
, bool new_gpr
, bool last
, bool lm3
,
862 bool should_inc
= needs_inc
&& new_gpr
&& !last
;
869 if (WARN_ON_ONCE(dst_byte
+ size
> 4 || off
% 4 + size
> 4))
874 /* Move the entire word */
876 wrp_mov(nfp_prog
, reg_both(dst
),
877 should_inc
? reg_lm_inc(3) : reg_lm(lm3
? 3 : 0, idx
));
881 if (WARN_ON_ONCE(lm3
&& idx
> RE_REG_LM_IDX_MAX
))
886 mask
= (1 << size
) - 1;
889 if (WARN_ON_ONCE(mask
> 0xf))
892 shf
= abs(src_byte
- dst_byte
) * 8;
893 if (src_byte
== dst_byte
) {
895 } else if (src_byte
< dst_byte
) {
902 /* ld_field can address fewer indexes, if offset too large do RMW.
903 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
905 if (idx
<= RE_REG_LM_IDX_MAX
) {
906 reg
= reg_lm(lm3
? 3 : 0, idx
);
908 reg
= imm_a(nfp_prog
);
909 /* If it's not the first part of the load and we start a new GPR
910 * that means we are loading a second part of the LMEM word into
911 * a new GPR. IOW we've already looked that LMEM word and
912 * therefore it has been loaded into imm_a().
914 if (first
|| !new_gpr
)
915 wrp_mov(nfp_prog
, reg
, reg_lm(0, idx
));
918 emit_ld_field_any(nfp_prog
, reg_both(dst
), mask
, reg
, sc
, shf
, new_gpr
);
921 wrp_mov(nfp_prog
, reg_none(), reg_lm_inc(3));
927 wrp_lmem_store(struct nfp_prog
*nfp_prog
, u8 src
, u8 src_byte
, s32 off
,
928 unsigned int size
, bool first
, bool new_gpr
, bool last
, bool lm3
,
931 bool should_inc
= needs_inc
&& new_gpr
&& !last
;
938 if (WARN_ON_ONCE(src_byte
+ size
> 4 || off
% 4 + size
> 4))
943 /* Move the entire word */
946 should_inc
? reg_lm_inc(3) : reg_lm(lm3
? 3 : 0, idx
),
951 if (WARN_ON_ONCE(lm3
&& idx
> RE_REG_LM_IDX_MAX
))
956 mask
= (1 << size
) - 1;
959 if (WARN_ON_ONCE(mask
> 0xf))
962 shf
= abs(src_byte
- dst_byte
) * 8;
963 if (src_byte
== dst_byte
) {
965 } else if (src_byte
< dst_byte
) {
972 /* ld_field can address fewer indexes, if offset too large do RMW.
973 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
975 if (idx
<= RE_REG_LM_IDX_MAX
) {
976 reg
= reg_lm(lm3
? 3 : 0, idx
);
978 reg
= imm_a(nfp_prog
);
979 /* Only first and last LMEM locations are going to need RMW,
980 * the middle location will be overwritten fully.
983 wrp_mov(nfp_prog
, reg
, reg_lm(0, idx
));
986 emit_ld_field(nfp_prog
, reg
, mask
, reg_b(src
), sc
, shf
);
988 if (new_gpr
|| last
) {
989 if (idx
> RE_REG_LM_IDX_MAX
)
990 wrp_mov(nfp_prog
, reg_lm(0, idx
), reg
);
992 wrp_mov(nfp_prog
, reg_none(), reg_lm_inc(3));
999 mem_op_stack(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1000 unsigned int size
, unsigned int ptr_off
, u8 gpr
, u8 ptr_gpr
,
1001 bool clr_gpr
, lmem_step step
)
1003 s32 off
= nfp_prog
->stack_depth
+ meta
->insn
.off
+ ptr_off
;
1004 bool first
= true, last
;
1005 bool needs_inc
= false;
1006 swreg stack_off_reg
;
1012 if (meta
->ptr_not_const
) {
1013 /* Use of the last encountered ptr_off is OK, they all have
1014 * the same alignment. Depend on low bits of value being
1015 * discarded when written to LMaddr register.
1017 stack_off_reg
= ur_load_imm_any(nfp_prog
, meta
->insn
.off
,
1018 stack_imm(nfp_prog
));
1020 emit_alu(nfp_prog
, imm_b(nfp_prog
),
1021 reg_a(ptr_gpr
), ALU_OP_ADD
, stack_off_reg
);
1024 } else if (off
+ size
<= 64) {
1025 /* We can reach bottom 64B with LMaddr0 */
1027 } else if (round_down(off
, 32) == round_down(off
+ size
- 1, 32)) {
1028 /* We have to set up a new pointer. If we know the offset
1029 * and the entire access falls into a single 32 byte aligned
1030 * window we won't have to increment the LM pointer.
1031 * The 32 byte alignment is imporant because offset is ORed in
1032 * not added when doing *l$indexN[off].
1034 stack_off_reg
= ur_load_imm_any(nfp_prog
, round_down(off
, 32),
1035 stack_imm(nfp_prog
));
1036 emit_alu(nfp_prog
, imm_b(nfp_prog
),
1037 stack_reg(nfp_prog
), ALU_OP_ADD
, stack_off_reg
);
1041 stack_off_reg
= ur_load_imm_any(nfp_prog
, round_down(off
, 4),
1042 stack_imm(nfp_prog
));
1044 emit_alu(nfp_prog
, imm_b(nfp_prog
),
1045 stack_reg(nfp_prog
), ALU_OP_ADD
, stack_off_reg
);
1050 emit_csr_wr(nfp_prog
, imm_b(nfp_prog
), NFP_CSR_ACT_LM_ADDR3
);
1051 /* For size < 4 one slot will be filled by zeroing of upper. */
1052 wrp_nops(nfp_prog
, clr_gpr
&& size
< 8 ? 2 : 3);
1055 if (clr_gpr
&& size
< 8)
1056 wrp_immed(nfp_prog
, reg_both(gpr
+ 1), 0);
1062 slice_size
= min(size
, 4 - gpr_byte
);
1063 slice_end
= min(off
+ slice_size
, round_up(off
+ 1, 4));
1064 slice_size
= slice_end
- off
;
1066 last
= slice_size
== size
;
1071 ret
= step(nfp_prog
, gpr
, gpr_byte
, off
, slice_size
,
1072 first
, gpr
!= prev_gpr
, last
, lm3
, needs_inc
);
1079 gpr_byte
+= slice_size
;
1080 if (gpr_byte
>= 4) {
1093 wrp_alu_imm(struct nfp_prog
*nfp_prog
, u8 dst
, enum alu_op alu_op
, u32 imm
)
1097 if (alu_op
== ALU_OP_AND
) {
1099 wrp_immed(nfp_prog
, reg_both(dst
), 0);
1103 if (alu_op
== ALU_OP_OR
) {
1105 wrp_immed(nfp_prog
, reg_both(dst
), ~0U);
1109 if (alu_op
== ALU_OP_XOR
) {
1111 emit_alu(nfp_prog
, reg_both(dst
), reg_none(),
1112 ALU_OP_NOT
, reg_b(dst
));
1117 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
, imm_b(nfp_prog
));
1118 emit_alu(nfp_prog
, reg_both(dst
), reg_a(dst
), alu_op
, tmp_reg
);
1122 wrp_alu64_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1123 enum alu_op alu_op
, bool skip
)
1125 const struct bpf_insn
*insn
= &meta
->insn
;
1126 u64 imm
= insn
->imm
; /* sign extend */
1133 wrp_alu_imm(nfp_prog
, insn
->dst_reg
* 2, alu_op
, imm
& ~0U);
1134 wrp_alu_imm(nfp_prog
, insn
->dst_reg
* 2 + 1, alu_op
, imm
>> 32);
1140 wrp_alu64_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1143 u8 dst
= meta
->insn
.dst_reg
* 2, src
= meta
->insn
.src_reg
* 2;
1145 emit_alu(nfp_prog
, reg_both(dst
), reg_a(dst
), alu_op
, reg_b(src
));
1146 emit_alu(nfp_prog
, reg_both(dst
+ 1),
1147 reg_a(dst
+ 1), alu_op
, reg_b(src
+ 1));
1153 wrp_alu32_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1154 enum alu_op alu_op
, bool skip
)
1156 const struct bpf_insn
*insn
= &meta
->insn
;
1163 wrp_alu_imm(nfp_prog
, insn
->dst_reg
* 2, alu_op
, insn
->imm
);
1164 wrp_immed(nfp_prog
, reg_both(insn
->dst_reg
* 2 + 1), 0);
1170 wrp_alu32_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1173 u8 dst
= meta
->insn
.dst_reg
* 2, src
= meta
->insn
.src_reg
* 2;
1175 emit_alu(nfp_prog
, reg_both(dst
), reg_a(dst
), alu_op
, reg_b(src
));
1176 wrp_immed(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2 + 1), 0);
1182 wrp_test_reg_one(struct nfp_prog
*nfp_prog
, u8 dst
, enum alu_op alu_op
, u8 src
,
1183 enum br_mask br_mask
, u16 off
)
1185 emit_alu(nfp_prog
, reg_none(), reg_a(dst
), alu_op
, reg_b(src
));
1186 emit_br(nfp_prog
, br_mask
, off
, 0);
1190 wrp_test_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1191 enum alu_op alu_op
, enum br_mask br_mask
)
1193 const struct bpf_insn
*insn
= &meta
->insn
;
1195 wrp_test_reg_one(nfp_prog
, insn
->dst_reg
* 2, alu_op
,
1196 insn
->src_reg
* 2, br_mask
, insn
->off
);
1197 wrp_test_reg_one(nfp_prog
, insn
->dst_reg
* 2 + 1, alu_op
,
1198 insn
->src_reg
* 2 + 1, br_mask
, insn
->off
);
1204 wrp_cmp_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1205 enum br_mask br_mask
, bool swap
)
1207 const struct bpf_insn
*insn
= &meta
->insn
;
1208 u64 imm
= insn
->imm
; /* sign extend */
1209 u8 reg
= insn
->dst_reg
* 2;
1212 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
& ~0U, imm_b(nfp_prog
));
1214 emit_alu(nfp_prog
, reg_none(), reg_a(reg
), ALU_OP_SUB
, tmp_reg
);
1216 emit_alu(nfp_prog
, reg_none(), tmp_reg
, ALU_OP_SUB
, reg_a(reg
));
1218 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
>> 32, imm_b(nfp_prog
));
1220 emit_alu(nfp_prog
, reg_none(),
1221 reg_a(reg
+ 1), ALU_OP_SUB_C
, tmp_reg
);
1223 emit_alu(nfp_prog
, reg_none(),
1224 tmp_reg
, ALU_OP_SUB_C
, reg_a(reg
+ 1));
1226 emit_br(nfp_prog
, br_mask
, insn
->off
, 0);
1232 wrp_cmp_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1233 enum br_mask br_mask
, bool swap
)
1235 const struct bpf_insn
*insn
= &meta
->insn
;
1238 areg
= insn
->dst_reg
* 2;
1239 breg
= insn
->src_reg
* 2;
1247 emit_alu(nfp_prog
, reg_none(), reg_a(areg
), ALU_OP_SUB
, reg_b(breg
));
1248 emit_alu(nfp_prog
, reg_none(),
1249 reg_a(areg
+ 1), ALU_OP_SUB_C
, reg_b(breg
+ 1));
1250 emit_br(nfp_prog
, br_mask
, insn
->off
, 0);
1255 static void wrp_end32(struct nfp_prog
*nfp_prog
, swreg reg_in
, u8 gpr_out
)
1257 emit_ld_field(nfp_prog
, reg_both(gpr_out
), 0xf, reg_in
,
1259 emit_ld_field(nfp_prog
, reg_both(gpr_out
), 0x5, reg_a(gpr_out
),
1263 static int adjust_head(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1265 swreg tmp
= imm_a(nfp_prog
), tmp_len
= imm_b(nfp_prog
);
1266 struct nfp_bpf_cap_adjust_head
*adjust_head
;
1267 u32 ret_einval
, end
;
1269 adjust_head
= &nfp_prog
->bpf
->adjust_head
;
1271 /* Optimized version - 5 vs 14 cycles */
1272 if (nfp_prog
->adjust_head_location
!= UINT_MAX
) {
1273 if (WARN_ON_ONCE(nfp_prog
->adjust_head_location
!= meta
->n
))
1276 emit_alu(nfp_prog
, pptr_reg(nfp_prog
),
1277 reg_a(2 * 2), ALU_OP_ADD
, pptr_reg(nfp_prog
));
1278 emit_alu(nfp_prog
, plen_reg(nfp_prog
),
1279 plen_reg(nfp_prog
), ALU_OP_SUB
, reg_a(2 * 2));
1280 emit_alu(nfp_prog
, pv_len(nfp_prog
),
1281 pv_len(nfp_prog
), ALU_OP_SUB
, reg_a(2 * 2));
1283 wrp_immed(nfp_prog
, reg_both(0), 0);
1284 wrp_immed(nfp_prog
, reg_both(1), 0);
1286 /* TODO: when adjust head is guaranteed to succeed we can
1287 * also eliminate the following if (r0 == 0) branch.
1293 ret_einval
= nfp_prog_current_offset(nfp_prog
) + 14;
1294 end
= ret_einval
+ 2;
1296 /* We need to use a temp because offset is just a part of the pkt ptr */
1297 emit_alu(nfp_prog
, tmp
,
1298 reg_a(2 * 2), ALU_OP_ADD_2B
, pptr_reg(nfp_prog
));
1300 /* Validate result will fit within FW datapath constraints */
1301 emit_alu(nfp_prog
, reg_none(),
1302 tmp
, ALU_OP_SUB
, reg_imm(adjust_head
->off_min
));
1303 emit_br(nfp_prog
, BR_BLO
, ret_einval
, 0);
1304 emit_alu(nfp_prog
, reg_none(),
1305 reg_imm(adjust_head
->off_max
), ALU_OP_SUB
, tmp
);
1306 emit_br(nfp_prog
, BR_BLO
, ret_einval
, 0);
1308 /* Validate the length is at least ETH_HLEN */
1309 emit_alu(nfp_prog
, tmp_len
,
1310 plen_reg(nfp_prog
), ALU_OP_SUB
, reg_a(2 * 2));
1311 emit_alu(nfp_prog
, reg_none(),
1312 tmp_len
, ALU_OP_SUB
, reg_imm(ETH_HLEN
));
1313 emit_br(nfp_prog
, BR_BMI
, ret_einval
, 0);
1315 /* Load the ret code */
1316 wrp_immed(nfp_prog
, reg_both(0), 0);
1317 wrp_immed(nfp_prog
, reg_both(1), 0);
1319 /* Modify the packet metadata */
1320 emit_ld_field(nfp_prog
, pptr_reg(nfp_prog
), 0x3, tmp
, SHF_SC_NONE
, 0);
1322 /* Skip over the -EINVAL ret code (defer 2) */
1323 emit_br(nfp_prog
, BR_UNC
, end
, 2);
1325 emit_alu(nfp_prog
, plen_reg(nfp_prog
),
1326 plen_reg(nfp_prog
), ALU_OP_SUB
, reg_a(2 * 2));
1327 emit_alu(nfp_prog
, pv_len(nfp_prog
),
1328 pv_len(nfp_prog
), ALU_OP_SUB
, reg_a(2 * 2));
1330 /* return -EINVAL target */
1331 if (!nfp_prog_confirm_current_offset(nfp_prog
, ret_einval
))
1334 wrp_immed(nfp_prog
, reg_both(0), -22);
1335 wrp_immed(nfp_prog
, reg_both(1), ~0);
1337 if (!nfp_prog_confirm_current_offset(nfp_prog
, end
))
1344 map_lookup_stack(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1346 struct bpf_offloaded_map
*offmap
;
1347 struct nfp_bpf_map
*nfp_map
;
1353 offmap
= (struct bpf_offloaded_map
*)meta
->arg1
.map_ptr
;
1354 nfp_map
= offmap
->dev_priv
;
1356 /* We only have to reload LM0 if the key is not at start of stack */
1357 lm_off
= nfp_prog
->stack_depth
;
1358 lm_off
+= meta
->arg2
.var_off
.value
+ meta
->arg2
.off
;
1359 load_lm_ptr
= meta
->arg2_var_off
|| lm_off
;
1361 /* Set LM0 to start of key */
1363 emit_csr_wr(nfp_prog
, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0
);
1365 /* Load map ID into a register, it should actually fit as an immediate
1366 * but in case it doesn't deal with it here, not in the delay slots.
1368 tid
= ur_load_imm_any(nfp_prog
, nfp_map
->tid
, imm_a(nfp_prog
));
1370 emit_br_relo(nfp_prog
, BR_UNC
, BR_OFF_RELO
+ BPF_FUNC_map_lookup_elem
,
1372 ret_tgt
= nfp_prog_current_offset(nfp_prog
) + 2;
1374 /* Load map ID into A0 */
1375 wrp_mov(nfp_prog
, reg_a(0), tid
);
1377 /* Load the return address into B0 */
1378 wrp_immed_relo(nfp_prog
, reg_b(0), ret_tgt
, RELO_IMMED_REL
);
1380 if (!nfp_prog_confirm_current_offset(nfp_prog
, ret_tgt
))
1383 /* Reset the LM0 pointer */
1387 emit_csr_wr(nfp_prog
, stack_reg(nfp_prog
), NFP_CSR_ACT_LM_ADDR0
);
1388 wrp_nops(nfp_prog
, 3);
1393 /* --- Callbacks --- */
1394 static int mov_reg64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1396 const struct bpf_insn
*insn
= &meta
->insn
;
1397 u8 dst
= insn
->dst_reg
* 2;
1398 u8 src
= insn
->src_reg
* 2;
1400 if (insn
->src_reg
== BPF_REG_10
) {
1401 swreg stack_depth_reg
;
1403 stack_depth_reg
= ur_load_imm_any(nfp_prog
,
1404 nfp_prog
->stack_depth
,
1405 stack_imm(nfp_prog
));
1406 emit_alu(nfp_prog
, reg_both(dst
),
1407 stack_reg(nfp_prog
), ALU_OP_ADD
, stack_depth_reg
);
1408 wrp_immed(nfp_prog
, reg_both(dst
+ 1), 0);
1410 wrp_reg_mov(nfp_prog
, dst
, src
);
1411 wrp_reg_mov(nfp_prog
, dst
+ 1, src
+ 1);
1417 static int mov_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1419 u64 imm
= meta
->insn
.imm
; /* sign extend */
1421 wrp_immed(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2), imm
& ~0U);
1422 wrp_immed(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2 + 1), imm
>> 32);
1427 static int xor_reg64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1429 return wrp_alu64_reg(nfp_prog
, meta
, ALU_OP_XOR
);
1432 static int xor_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1434 return wrp_alu64_imm(nfp_prog
, meta
, ALU_OP_XOR
, !meta
->insn
.imm
);
1437 static int and_reg64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1439 return wrp_alu64_reg(nfp_prog
, meta
, ALU_OP_AND
);
1442 static int and_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1444 return wrp_alu64_imm(nfp_prog
, meta
, ALU_OP_AND
, !~meta
->insn
.imm
);
1447 static int or_reg64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1449 return wrp_alu64_reg(nfp_prog
, meta
, ALU_OP_OR
);
1452 static int or_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1454 return wrp_alu64_imm(nfp_prog
, meta
, ALU_OP_OR
, !meta
->insn
.imm
);
1457 static int add_reg64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1459 const struct bpf_insn
*insn
= &meta
->insn
;
1461 emit_alu(nfp_prog
, reg_both(insn
->dst_reg
* 2),
1462 reg_a(insn
->dst_reg
* 2), ALU_OP_ADD
,
1463 reg_b(insn
->src_reg
* 2));
1464 emit_alu(nfp_prog
, reg_both(insn
->dst_reg
* 2 + 1),
1465 reg_a(insn
->dst_reg
* 2 + 1), ALU_OP_ADD_C
,
1466 reg_b(insn
->src_reg
* 2 + 1));
1471 static int add_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1473 const struct bpf_insn
*insn
= &meta
->insn
;
1474 u64 imm
= insn
->imm
; /* sign extend */
1476 wrp_alu_imm(nfp_prog
, insn
->dst_reg
* 2, ALU_OP_ADD
, imm
& ~0U);
1477 wrp_alu_imm(nfp_prog
, insn
->dst_reg
* 2 + 1, ALU_OP_ADD_C
, imm
>> 32);
1482 static int sub_reg64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1484 const struct bpf_insn
*insn
= &meta
->insn
;
1486 emit_alu(nfp_prog
, reg_both(insn
->dst_reg
* 2),
1487 reg_a(insn
->dst_reg
* 2), ALU_OP_SUB
,
1488 reg_b(insn
->src_reg
* 2));
1489 emit_alu(nfp_prog
, reg_both(insn
->dst_reg
* 2 + 1),
1490 reg_a(insn
->dst_reg
* 2 + 1), ALU_OP_SUB_C
,
1491 reg_b(insn
->src_reg
* 2 + 1));
1496 static int sub_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1498 const struct bpf_insn
*insn
= &meta
->insn
;
1499 u64 imm
= insn
->imm
; /* sign extend */
1501 wrp_alu_imm(nfp_prog
, insn
->dst_reg
* 2, ALU_OP_SUB
, imm
& ~0U);
1502 wrp_alu_imm(nfp_prog
, insn
->dst_reg
* 2 + 1, ALU_OP_SUB_C
, imm
>> 32);
1507 static int neg_reg64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1509 const struct bpf_insn
*insn
= &meta
->insn
;
1511 emit_alu(nfp_prog
, reg_both(insn
->dst_reg
* 2), reg_imm(0),
1512 ALU_OP_SUB
, reg_b(insn
->dst_reg
* 2));
1513 emit_alu(nfp_prog
, reg_both(insn
->dst_reg
* 2 + 1), reg_imm(0),
1514 ALU_OP_SUB_C
, reg_b(insn
->dst_reg
* 2 + 1));
1519 static int shl_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1521 const struct bpf_insn
*insn
= &meta
->insn
;
1522 u8 dst
= insn
->dst_reg
* 2;
1524 if (insn
->imm
< 32) {
1525 emit_shf(nfp_prog
, reg_both(dst
+ 1),
1526 reg_a(dst
+ 1), SHF_OP_NONE
, reg_b(dst
),
1527 SHF_SC_R_DSHF
, 32 - insn
->imm
);
1528 emit_shf(nfp_prog
, reg_both(dst
),
1529 reg_none(), SHF_OP_NONE
, reg_b(dst
),
1530 SHF_SC_L_SHF
, insn
->imm
);
1531 } else if (insn
->imm
== 32) {
1532 wrp_reg_mov(nfp_prog
, dst
+ 1, dst
);
1533 wrp_immed(nfp_prog
, reg_both(dst
), 0);
1534 } else if (insn
->imm
> 32) {
1535 emit_shf(nfp_prog
, reg_both(dst
+ 1),
1536 reg_none(), SHF_OP_NONE
, reg_b(dst
),
1537 SHF_SC_L_SHF
, insn
->imm
- 32);
1538 wrp_immed(nfp_prog
, reg_both(dst
), 0);
1544 static int shr_imm64(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1546 const struct bpf_insn
*insn
= &meta
->insn
;
1547 u8 dst
= insn
->dst_reg
* 2;
1549 if (insn
->imm
< 32) {
1550 emit_shf(nfp_prog
, reg_both(dst
),
1551 reg_a(dst
+ 1), SHF_OP_NONE
, reg_b(dst
),
1552 SHF_SC_R_DSHF
, insn
->imm
);
1553 emit_shf(nfp_prog
, reg_both(dst
+ 1),
1554 reg_none(), SHF_OP_NONE
, reg_b(dst
+ 1),
1555 SHF_SC_R_SHF
, insn
->imm
);
1556 } else if (insn
->imm
== 32) {
1557 wrp_reg_mov(nfp_prog
, dst
, dst
+ 1);
1558 wrp_immed(nfp_prog
, reg_both(dst
+ 1), 0);
1559 } else if (insn
->imm
> 32) {
1560 emit_shf(nfp_prog
, reg_both(dst
),
1561 reg_none(), SHF_OP_NONE
, reg_b(dst
+ 1),
1562 SHF_SC_R_SHF
, insn
->imm
- 32);
1563 wrp_immed(nfp_prog
, reg_both(dst
+ 1), 0);
1569 static int mov_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1571 const struct bpf_insn
*insn
= &meta
->insn
;
1573 wrp_reg_mov(nfp_prog
, insn
->dst_reg
* 2, insn
->src_reg
* 2);
1574 wrp_immed(nfp_prog
, reg_both(insn
->dst_reg
* 2 + 1), 0);
1579 static int mov_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1581 const struct bpf_insn
*insn
= &meta
->insn
;
1583 wrp_immed(nfp_prog
, reg_both(insn
->dst_reg
* 2), insn
->imm
);
1584 wrp_immed(nfp_prog
, reg_both(insn
->dst_reg
* 2 + 1), 0);
1589 static int xor_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1591 return wrp_alu32_reg(nfp_prog
, meta
, ALU_OP_XOR
);
1594 static int xor_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1596 return wrp_alu32_imm(nfp_prog
, meta
, ALU_OP_XOR
, !~meta
->insn
.imm
);
1599 static int and_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1601 return wrp_alu32_reg(nfp_prog
, meta
, ALU_OP_AND
);
1604 static int and_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1606 return wrp_alu32_imm(nfp_prog
, meta
, ALU_OP_AND
, !~meta
->insn
.imm
);
1609 static int or_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1611 return wrp_alu32_reg(nfp_prog
, meta
, ALU_OP_OR
);
1614 static int or_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1616 return wrp_alu32_imm(nfp_prog
, meta
, ALU_OP_OR
, !meta
->insn
.imm
);
1619 static int add_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1621 return wrp_alu32_reg(nfp_prog
, meta
, ALU_OP_ADD
);
1624 static int add_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1626 return wrp_alu32_imm(nfp_prog
, meta
, ALU_OP_ADD
, !meta
->insn
.imm
);
1629 static int sub_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1631 return wrp_alu32_reg(nfp_prog
, meta
, ALU_OP_SUB
);
1634 static int sub_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1636 return wrp_alu32_imm(nfp_prog
, meta
, ALU_OP_SUB
, !meta
->insn
.imm
);
1639 static int neg_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1641 u8 dst
= meta
->insn
.dst_reg
* 2;
1643 emit_alu(nfp_prog
, reg_both(dst
), reg_imm(0), ALU_OP_SUB
, reg_b(dst
));
1644 wrp_immed(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2 + 1), 0);
1649 static int shl_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1651 const struct bpf_insn
*insn
= &meta
->insn
;
1654 return 1; /* TODO: zero shift means indirect */
1656 emit_shf(nfp_prog
, reg_both(insn
->dst_reg
* 2),
1657 reg_none(), SHF_OP_NONE
, reg_b(insn
->dst_reg
* 2),
1658 SHF_SC_L_SHF
, insn
->imm
);
1659 wrp_immed(nfp_prog
, reg_both(insn
->dst_reg
* 2 + 1), 0);
1664 static int end_reg32(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1666 const struct bpf_insn
*insn
= &meta
->insn
;
1667 u8 gpr
= insn
->dst_reg
* 2;
1669 switch (insn
->imm
) {
1671 emit_ld_field(nfp_prog
, reg_both(gpr
), 0x9, reg_b(gpr
),
1673 emit_ld_field(nfp_prog
, reg_both(gpr
), 0xe, reg_a(gpr
),
1676 wrp_immed(nfp_prog
, reg_both(gpr
+ 1), 0);
1679 wrp_end32(nfp_prog
, reg_a(gpr
), gpr
);
1680 wrp_immed(nfp_prog
, reg_both(gpr
+ 1), 0);
1683 wrp_mov(nfp_prog
, imm_a(nfp_prog
), reg_b(gpr
+ 1));
1685 wrp_end32(nfp_prog
, reg_a(gpr
), gpr
+ 1);
1686 wrp_end32(nfp_prog
, imm_a(nfp_prog
), gpr
);
1693 static int imm_ld8_part2(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1695 struct nfp_insn_meta
*prev
= nfp_meta_prev(meta
);
1699 dst
= prev
->insn
.dst_reg
* 2;
1700 imm_lo
= prev
->insn
.imm
;
1701 imm_hi
= meta
->insn
.imm
;
1703 wrp_immed(nfp_prog
, reg_both(dst
), imm_lo
);
1705 /* mov is always 1 insn, load imm may be two, so try to use mov */
1706 if (imm_hi
== imm_lo
)
1707 wrp_mov(nfp_prog
, reg_both(dst
+ 1), reg_a(dst
));
1709 wrp_immed(nfp_prog
, reg_both(dst
+ 1), imm_hi
);
1714 static int imm_ld8(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1716 meta
->double_cb
= imm_ld8_part2
;
1720 static int data_ld1(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1722 return construct_data_ld(nfp_prog
, meta
->insn
.imm
, 1);
1725 static int data_ld2(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1727 return construct_data_ld(nfp_prog
, meta
->insn
.imm
, 2);
1730 static int data_ld4(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1732 return construct_data_ld(nfp_prog
, meta
->insn
.imm
, 4);
1735 static int data_ind_ld1(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1737 return construct_data_ind_ld(nfp_prog
, meta
->insn
.imm
,
1738 meta
->insn
.src_reg
* 2, 1);
1741 static int data_ind_ld2(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1743 return construct_data_ind_ld(nfp_prog
, meta
->insn
.imm
,
1744 meta
->insn
.src_reg
* 2, 2);
1747 static int data_ind_ld4(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1749 return construct_data_ind_ld(nfp_prog
, meta
->insn
.imm
,
1750 meta
->insn
.src_reg
* 2, 4);
1754 mem_ldx_stack(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1755 unsigned int size
, unsigned int ptr_off
)
1757 return mem_op_stack(nfp_prog
, meta
, size
, ptr_off
,
1758 meta
->insn
.dst_reg
* 2, meta
->insn
.src_reg
* 2,
1759 true, wrp_lmem_load
);
1762 static int mem_ldx_skb(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1765 swreg dst
= reg_both(meta
->insn
.dst_reg
* 2);
1767 switch (meta
->insn
.off
) {
1768 case offsetof(struct __sk_buff
, len
):
1769 if (size
!= FIELD_SIZEOF(struct __sk_buff
, len
))
1771 wrp_mov(nfp_prog
, dst
, plen_reg(nfp_prog
));
1773 case offsetof(struct __sk_buff
, data
):
1774 if (size
!= FIELD_SIZEOF(struct __sk_buff
, data
))
1776 wrp_mov(nfp_prog
, dst
, pptr_reg(nfp_prog
));
1778 case offsetof(struct __sk_buff
, data_end
):
1779 if (size
!= FIELD_SIZEOF(struct __sk_buff
, data_end
))
1781 emit_alu(nfp_prog
, dst
,
1782 plen_reg(nfp_prog
), ALU_OP_ADD
, pptr_reg(nfp_prog
));
1788 wrp_immed(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2 + 1), 0);
1793 static int mem_ldx_xdp(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1796 swreg dst
= reg_both(meta
->insn
.dst_reg
* 2);
1798 switch (meta
->insn
.off
) {
1799 case offsetof(struct xdp_md
, data
):
1800 if (size
!= FIELD_SIZEOF(struct xdp_md
, data
))
1802 wrp_mov(nfp_prog
, dst
, pptr_reg(nfp_prog
));
1804 case offsetof(struct xdp_md
, data_end
):
1805 if (size
!= FIELD_SIZEOF(struct xdp_md
, data_end
))
1807 emit_alu(nfp_prog
, dst
,
1808 plen_reg(nfp_prog
), ALU_OP_ADD
, pptr_reg(nfp_prog
));
1814 wrp_immed(nfp_prog
, reg_both(meta
->insn
.dst_reg
* 2 + 1), 0);
1820 mem_ldx_data(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1825 tmp_reg
= re_load_imm_any(nfp_prog
, meta
->insn
.off
, imm_b(nfp_prog
));
1827 return data_ld_host_order_addr32(nfp_prog
, meta
->insn
.src_reg
* 2,
1828 tmp_reg
, meta
->insn
.dst_reg
* 2, size
);
1832 mem_ldx_emem(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1837 tmp_reg
= re_load_imm_any(nfp_prog
, meta
->insn
.off
, imm_b(nfp_prog
));
1839 return data_ld_host_order_addr40(nfp_prog
, meta
->insn
.src_reg
* 2,
1840 tmp_reg
, meta
->insn
.dst_reg
* 2, size
);
1844 mem_ldx(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1847 if (meta
->ldst_gather_len
)
1848 return nfp_cpp_memcpy(nfp_prog
, meta
);
1850 if (meta
->ptr
.type
== PTR_TO_CTX
) {
1851 if (nfp_prog
->type
== BPF_PROG_TYPE_XDP
)
1852 return mem_ldx_xdp(nfp_prog
, meta
, size
);
1854 return mem_ldx_skb(nfp_prog
, meta
, size
);
1857 if (meta
->ptr
.type
== PTR_TO_PACKET
)
1858 return mem_ldx_data(nfp_prog
, meta
, size
);
1860 if (meta
->ptr
.type
== PTR_TO_STACK
)
1861 return mem_ldx_stack(nfp_prog
, meta
, size
,
1862 meta
->ptr
.off
+ meta
->ptr
.var_off
.value
);
1864 if (meta
->ptr
.type
== PTR_TO_MAP_VALUE
)
1865 return mem_ldx_emem(nfp_prog
, meta
, size
);
1870 static int mem_ldx1(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1872 return mem_ldx(nfp_prog
, meta
, 1);
1875 static int mem_ldx2(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1877 return mem_ldx(nfp_prog
, meta
, 2);
1880 static int mem_ldx4(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1882 return mem_ldx(nfp_prog
, meta
, 4);
1885 static int mem_ldx8(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1887 return mem_ldx(nfp_prog
, meta
, 8);
1891 mem_st_data(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1894 u64 imm
= meta
->insn
.imm
; /* sign extend */
1897 off_reg
= re_load_imm_any(nfp_prog
, meta
->insn
.off
, imm_b(nfp_prog
));
1899 return data_st_host_order(nfp_prog
, meta
->insn
.dst_reg
* 2, off_reg
,
1903 static int mem_st(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1906 if (meta
->ptr
.type
== PTR_TO_PACKET
)
1907 return mem_st_data(nfp_prog
, meta
, size
);
1912 static int mem_st1(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1914 return mem_st(nfp_prog
, meta
, 1);
1917 static int mem_st2(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1919 return mem_st(nfp_prog
, meta
, 2);
1922 static int mem_st4(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1924 return mem_st(nfp_prog
, meta
, 4);
1927 static int mem_st8(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1929 return mem_st(nfp_prog
, meta
, 8);
1933 mem_stx_data(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1938 off_reg
= re_load_imm_any(nfp_prog
, meta
->insn
.off
, imm_b(nfp_prog
));
1940 return data_stx_host_order(nfp_prog
, meta
->insn
.dst_reg
* 2, off_reg
,
1941 meta
->insn
.src_reg
* 2, size
);
1945 mem_stx_stack(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1946 unsigned int size
, unsigned int ptr_off
)
1948 return mem_op_stack(nfp_prog
, meta
, size
, ptr_off
,
1949 meta
->insn
.src_reg
* 2, meta
->insn
.dst_reg
* 2,
1950 false, wrp_lmem_store
);
1954 mem_stx(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
,
1957 if (meta
->ptr
.type
== PTR_TO_PACKET
)
1958 return mem_stx_data(nfp_prog
, meta
, size
);
1960 if (meta
->ptr
.type
== PTR_TO_STACK
)
1961 return mem_stx_stack(nfp_prog
, meta
, size
,
1962 meta
->ptr
.off
+ meta
->ptr
.var_off
.value
);
1967 static int mem_stx1(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1969 return mem_stx(nfp_prog
, meta
, 1);
1972 static int mem_stx2(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1974 return mem_stx(nfp_prog
, meta
, 2);
1977 static int mem_stx4(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1979 return mem_stx(nfp_prog
, meta
, 4);
1982 static int mem_stx8(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1984 return mem_stx(nfp_prog
, meta
, 8);
1987 static int jump(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1989 emit_br(nfp_prog
, BR_UNC
, meta
->insn
.off
, 0);
1994 static int jeq_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
1996 const struct bpf_insn
*insn
= &meta
->insn
;
1997 u64 imm
= insn
->imm
; /* sign extend */
1998 swreg or1
, or2
, tmp_reg
;
2000 or1
= reg_a(insn
->dst_reg
* 2);
2001 or2
= reg_b(insn
->dst_reg
* 2 + 1);
2004 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
& ~0U, imm_b(nfp_prog
));
2005 emit_alu(nfp_prog
, imm_a(nfp_prog
),
2006 reg_a(insn
->dst_reg
* 2), ALU_OP_XOR
, tmp_reg
);
2007 or1
= imm_a(nfp_prog
);
2011 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
>> 32, imm_b(nfp_prog
));
2012 emit_alu(nfp_prog
, imm_b(nfp_prog
),
2013 reg_a(insn
->dst_reg
* 2 + 1), ALU_OP_XOR
, tmp_reg
);
2014 or2
= imm_b(nfp_prog
);
2017 emit_alu(nfp_prog
, reg_none(), or1
, ALU_OP_OR
, or2
);
2018 emit_br(nfp_prog
, BR_BEQ
, insn
->off
, 0);
2023 static int jgt_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2025 return wrp_cmp_imm(nfp_prog
, meta
, BR_BLO
, true);
2028 static int jge_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2030 return wrp_cmp_imm(nfp_prog
, meta
, BR_BHS
, false);
2033 static int jlt_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2035 return wrp_cmp_imm(nfp_prog
, meta
, BR_BLO
, false);
2038 static int jle_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2040 return wrp_cmp_imm(nfp_prog
, meta
, BR_BHS
, true);
2043 static int jsgt_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2045 return wrp_cmp_imm(nfp_prog
, meta
, BR_BLT
, true);
2048 static int jsge_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2050 return wrp_cmp_imm(nfp_prog
, meta
, BR_BGE
, false);
2053 static int jslt_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2055 return wrp_cmp_imm(nfp_prog
, meta
, BR_BLT
, false);
2058 static int jsle_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2060 return wrp_cmp_imm(nfp_prog
, meta
, BR_BGE
, true);
2063 static int jset_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2065 const struct bpf_insn
*insn
= &meta
->insn
;
2066 u64 imm
= insn
->imm
; /* sign extend */
2075 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
& ~0U, imm_b(nfp_prog
));
2076 emit_alu(nfp_prog
, reg_none(),
2077 reg_a(insn
->dst_reg
* 2), ALU_OP_AND
, tmp_reg
);
2078 emit_br(nfp_prog
, BR_BNE
, insn
->off
, 0);
2082 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
>> 32, imm_b(nfp_prog
));
2083 emit_alu(nfp_prog
, reg_none(),
2084 reg_a(insn
->dst_reg
* 2 + 1), ALU_OP_AND
, tmp_reg
);
2085 emit_br(nfp_prog
, BR_BNE
, insn
->off
, 0);
2091 static int jne_imm(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2093 const struct bpf_insn
*insn
= &meta
->insn
;
2094 u64 imm
= insn
->imm
; /* sign extend */
2098 emit_alu(nfp_prog
, reg_none(), reg_a(insn
->dst_reg
* 2),
2099 ALU_OP_OR
, reg_b(insn
->dst_reg
* 2 + 1));
2100 emit_br(nfp_prog
, BR_BNE
, insn
->off
, 0);
2104 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
& ~0U, imm_b(nfp_prog
));
2105 emit_alu(nfp_prog
, reg_none(),
2106 reg_a(insn
->dst_reg
* 2), ALU_OP_XOR
, tmp_reg
);
2107 emit_br(nfp_prog
, BR_BNE
, insn
->off
, 0);
2109 tmp_reg
= ur_load_imm_any(nfp_prog
, imm
>> 32, imm_b(nfp_prog
));
2110 emit_alu(nfp_prog
, reg_none(),
2111 reg_a(insn
->dst_reg
* 2 + 1), ALU_OP_XOR
, tmp_reg
);
2112 emit_br(nfp_prog
, BR_BNE
, insn
->off
, 0);
2117 static int jeq_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2119 const struct bpf_insn
*insn
= &meta
->insn
;
2121 emit_alu(nfp_prog
, imm_a(nfp_prog
), reg_a(insn
->dst_reg
* 2),
2122 ALU_OP_XOR
, reg_b(insn
->src_reg
* 2));
2123 emit_alu(nfp_prog
, imm_b(nfp_prog
), reg_a(insn
->dst_reg
* 2 + 1),
2124 ALU_OP_XOR
, reg_b(insn
->src_reg
* 2 + 1));
2125 emit_alu(nfp_prog
, reg_none(),
2126 imm_a(nfp_prog
), ALU_OP_OR
, imm_b(nfp_prog
));
2127 emit_br(nfp_prog
, BR_BEQ
, insn
->off
, 0);
2132 static int jgt_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2134 return wrp_cmp_reg(nfp_prog
, meta
, BR_BLO
, true);
2137 static int jge_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2139 return wrp_cmp_reg(nfp_prog
, meta
, BR_BHS
, false);
2142 static int jlt_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2144 return wrp_cmp_reg(nfp_prog
, meta
, BR_BLO
, false);
2147 static int jle_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2149 return wrp_cmp_reg(nfp_prog
, meta
, BR_BHS
, true);
2152 static int jsgt_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2154 return wrp_cmp_reg(nfp_prog
, meta
, BR_BLT
, true);
2157 static int jsge_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2159 return wrp_cmp_reg(nfp_prog
, meta
, BR_BGE
, false);
2162 static int jslt_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2164 return wrp_cmp_reg(nfp_prog
, meta
, BR_BLT
, false);
2167 static int jsle_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2169 return wrp_cmp_reg(nfp_prog
, meta
, BR_BGE
, true);
2172 static int jset_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2174 return wrp_test_reg(nfp_prog
, meta
, ALU_OP_AND
, BR_BNE
);
2177 static int jne_reg(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2179 return wrp_test_reg(nfp_prog
, meta
, ALU_OP_XOR
, BR_BNE
);
2182 static int call(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2184 switch (meta
->insn
.imm
) {
2185 case BPF_FUNC_xdp_adjust_head
:
2186 return adjust_head(nfp_prog
, meta
);
2187 case BPF_FUNC_map_lookup_elem
:
2188 return map_lookup_stack(nfp_prog
, meta
);
2190 WARN_ONCE(1, "verifier allowed unsupported function\n");
2195 static int goto_out(struct nfp_prog
*nfp_prog
, struct nfp_insn_meta
*meta
)
2197 emit_br_relo(nfp_prog
, BR_UNC
, BR_OFF_RELO
, 0, RELO_BR_GO_OUT
);
2202 static const instr_cb_t instr_cb
[256] = {
2203 [BPF_ALU64
| BPF_MOV
| BPF_X
] = mov_reg64
,
2204 [BPF_ALU64
| BPF_MOV
| BPF_K
] = mov_imm64
,
2205 [BPF_ALU64
| BPF_XOR
| BPF_X
] = xor_reg64
,
2206 [BPF_ALU64
| BPF_XOR
| BPF_K
] = xor_imm64
,
2207 [BPF_ALU64
| BPF_AND
| BPF_X
] = and_reg64
,
2208 [BPF_ALU64
| BPF_AND
| BPF_K
] = and_imm64
,
2209 [BPF_ALU64
| BPF_OR
| BPF_X
] = or_reg64
,
2210 [BPF_ALU64
| BPF_OR
| BPF_K
] = or_imm64
,
2211 [BPF_ALU64
| BPF_ADD
| BPF_X
] = add_reg64
,
2212 [BPF_ALU64
| BPF_ADD
| BPF_K
] = add_imm64
,
2213 [BPF_ALU64
| BPF_SUB
| BPF_X
] = sub_reg64
,
2214 [BPF_ALU64
| BPF_SUB
| BPF_K
] = sub_imm64
,
2215 [BPF_ALU64
| BPF_NEG
] = neg_reg64
,
2216 [BPF_ALU64
| BPF_LSH
| BPF_K
] = shl_imm64
,
2217 [BPF_ALU64
| BPF_RSH
| BPF_K
] = shr_imm64
,
2218 [BPF_ALU
| BPF_MOV
| BPF_X
] = mov_reg
,
2219 [BPF_ALU
| BPF_MOV
| BPF_K
] = mov_imm
,
2220 [BPF_ALU
| BPF_XOR
| BPF_X
] = xor_reg
,
2221 [BPF_ALU
| BPF_XOR
| BPF_K
] = xor_imm
,
2222 [BPF_ALU
| BPF_AND
| BPF_X
] = and_reg
,
2223 [BPF_ALU
| BPF_AND
| BPF_K
] = and_imm
,
2224 [BPF_ALU
| BPF_OR
| BPF_X
] = or_reg
,
2225 [BPF_ALU
| BPF_OR
| BPF_K
] = or_imm
,
2226 [BPF_ALU
| BPF_ADD
| BPF_X
] = add_reg
,
2227 [BPF_ALU
| BPF_ADD
| BPF_K
] = add_imm
,
2228 [BPF_ALU
| BPF_SUB
| BPF_X
] = sub_reg
,
2229 [BPF_ALU
| BPF_SUB
| BPF_K
] = sub_imm
,
2230 [BPF_ALU
| BPF_NEG
] = neg_reg
,
2231 [BPF_ALU
| BPF_LSH
| BPF_K
] = shl_imm
,
2232 [BPF_ALU
| BPF_END
| BPF_X
] = end_reg32
,
2233 [BPF_LD
| BPF_IMM
| BPF_DW
] = imm_ld8
,
2234 [BPF_LD
| BPF_ABS
| BPF_B
] = data_ld1
,
2235 [BPF_LD
| BPF_ABS
| BPF_H
] = data_ld2
,
2236 [BPF_LD
| BPF_ABS
| BPF_W
] = data_ld4
,
2237 [BPF_LD
| BPF_IND
| BPF_B
] = data_ind_ld1
,
2238 [BPF_LD
| BPF_IND
| BPF_H
] = data_ind_ld2
,
2239 [BPF_LD
| BPF_IND
| BPF_W
] = data_ind_ld4
,
2240 [BPF_LDX
| BPF_MEM
| BPF_B
] = mem_ldx1
,
2241 [BPF_LDX
| BPF_MEM
| BPF_H
] = mem_ldx2
,
2242 [BPF_LDX
| BPF_MEM
| BPF_W
] = mem_ldx4
,
2243 [BPF_LDX
| BPF_MEM
| BPF_DW
] = mem_ldx8
,
2244 [BPF_STX
| BPF_MEM
| BPF_B
] = mem_stx1
,
2245 [BPF_STX
| BPF_MEM
| BPF_H
] = mem_stx2
,
2246 [BPF_STX
| BPF_MEM
| BPF_W
] = mem_stx4
,
2247 [BPF_STX
| BPF_MEM
| BPF_DW
] = mem_stx8
,
2248 [BPF_ST
| BPF_MEM
| BPF_B
] = mem_st1
,
2249 [BPF_ST
| BPF_MEM
| BPF_H
] = mem_st2
,
2250 [BPF_ST
| BPF_MEM
| BPF_W
] = mem_st4
,
2251 [BPF_ST
| BPF_MEM
| BPF_DW
] = mem_st8
,
2252 [BPF_JMP
| BPF_JA
| BPF_K
] = jump
,
2253 [BPF_JMP
| BPF_JEQ
| BPF_K
] = jeq_imm
,
2254 [BPF_JMP
| BPF_JGT
| BPF_K
] = jgt_imm
,
2255 [BPF_JMP
| BPF_JGE
| BPF_K
] = jge_imm
,
2256 [BPF_JMP
| BPF_JLT
| BPF_K
] = jlt_imm
,
2257 [BPF_JMP
| BPF_JLE
| BPF_K
] = jle_imm
,
2258 [BPF_JMP
| BPF_JSGT
| BPF_K
] = jsgt_imm
,
2259 [BPF_JMP
| BPF_JSGE
| BPF_K
] = jsge_imm
,
2260 [BPF_JMP
| BPF_JSLT
| BPF_K
] = jslt_imm
,
2261 [BPF_JMP
| BPF_JSLE
| BPF_K
] = jsle_imm
,
2262 [BPF_JMP
| BPF_JSET
| BPF_K
] = jset_imm
,
2263 [BPF_JMP
| BPF_JNE
| BPF_K
] = jne_imm
,
2264 [BPF_JMP
| BPF_JEQ
| BPF_X
] = jeq_reg
,
2265 [BPF_JMP
| BPF_JGT
| BPF_X
] = jgt_reg
,
2266 [BPF_JMP
| BPF_JGE
| BPF_X
] = jge_reg
,
2267 [BPF_JMP
| BPF_JLT
| BPF_X
] = jlt_reg
,
2268 [BPF_JMP
| BPF_JLE
| BPF_X
] = jle_reg
,
2269 [BPF_JMP
| BPF_JSGT
| BPF_X
] = jsgt_reg
,
2270 [BPF_JMP
| BPF_JSGE
| BPF_X
] = jsge_reg
,
2271 [BPF_JMP
| BPF_JSLT
| BPF_X
] = jslt_reg
,
2272 [BPF_JMP
| BPF_JSLE
| BPF_X
] = jsle_reg
,
2273 [BPF_JMP
| BPF_JSET
| BPF_X
] = jset_reg
,
2274 [BPF_JMP
| BPF_JNE
| BPF_X
] = jne_reg
,
2275 [BPF_JMP
| BPF_CALL
] = call
,
2276 [BPF_JMP
| BPF_EXIT
] = goto_out
,
2279 /* --- Assembler logic --- */
2280 static int nfp_fixup_branches(struct nfp_prog
*nfp_prog
)
2282 struct nfp_insn_meta
*meta
, *jmp_dst
;
2285 list_for_each_entry(meta
, &nfp_prog
->insns
, l
) {
2288 if (meta
->insn
.code
== (BPF_JMP
| BPF_CALL
))
2290 if (BPF_CLASS(meta
->insn
.code
) != BPF_JMP
)
2293 if (list_is_last(&meta
->l
, &nfp_prog
->insns
))
2294 br_idx
= nfp_prog
->last_bpf_off
;
2296 br_idx
= list_next_entry(meta
, l
)->off
- 1;
2298 if (!nfp_is_br(nfp_prog
->prog
[br_idx
])) {
2299 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
2300 br_idx
, meta
->insn
.code
, nfp_prog
->prog
[br_idx
]);
2303 /* Leave special branches for later */
2304 if (FIELD_GET(OP_RELO_TYPE
, nfp_prog
->prog
[br_idx
]) !=
2308 if (!meta
->jmp_dst
) {
2309 pr_err("Non-exit jump doesn't have destination info recorded!!\n");
2313 jmp_dst
= meta
->jmp_dst
;
2315 if (jmp_dst
->skip
) {
2316 pr_err("Branch landing on removed instruction!!\n");
2320 for (idx
= meta
->off
; idx
<= br_idx
; idx
++) {
2321 if (!nfp_is_br(nfp_prog
->prog
[idx
]))
2323 br_set_offset(&nfp_prog
->prog
[idx
], jmp_dst
->off
);
2330 static void nfp_intro(struct nfp_prog
*nfp_prog
)
2332 wrp_immed(nfp_prog
, plen_reg(nfp_prog
), GENMASK(13, 0));
2333 emit_alu(nfp_prog
, plen_reg(nfp_prog
),
2334 plen_reg(nfp_prog
), ALU_OP_AND
, pv_len(nfp_prog
));
2337 static void nfp_outro_tc_da(struct nfp_prog
*nfp_prog
)
2339 /* TC direct-action mode:
2340 * 0,1 ok NOT SUPPORTED[1]
2341 * 2 drop 0x22 -> drop, count as stat1
2342 * 4,5 nuke 0x02 -> drop
2343 * 7 redir 0x44 -> redir, count as stat2
2344 * * unspec 0x11 -> pass, count as stat0
2346 * [1] We can't support OK and RECLASSIFY because we can't tell TC
2347 * the exact decision made. We are forced to support UNSPEC
2348 * to handle aborts so that's the only one we handle for passing
2349 * packets up the stack.
2351 /* Target for aborts */
2352 nfp_prog
->tgt_abort
= nfp_prog_current_offset(nfp_prog
);
2354 emit_br_relo(nfp_prog
, BR_UNC
, BR_OFF_RELO
, 2, RELO_BR_NEXT_PKT
);
2356 wrp_mov(nfp_prog
, reg_a(0), NFP_BPF_ABI_FLAGS
);
2357 emit_ld_field(nfp_prog
, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF
, 16);
2359 /* Target for normal exits */
2360 nfp_prog
->tgt_out
= nfp_prog_current_offset(nfp_prog
);
2362 /* if R0 > 7 jump to abort */
2363 emit_alu(nfp_prog
, reg_none(), reg_imm(7), ALU_OP_SUB
, reg_b(0));
2364 emit_br(nfp_prog
, BR_BLO
, nfp_prog
->tgt_abort
, 0);
2365 wrp_mov(nfp_prog
, reg_a(0), NFP_BPF_ABI_FLAGS
);
2367 wrp_immed(nfp_prog
, reg_b(2), 0x41221211);
2368 wrp_immed(nfp_prog
, reg_b(3), 0x41001211);
2370 emit_shf(nfp_prog
, reg_a(1),
2371 reg_none(), SHF_OP_NONE
, reg_b(0), SHF_SC_L_SHF
, 2);
2373 emit_alu(nfp_prog
, reg_none(), reg_a(1), ALU_OP_OR
, reg_imm(0));
2374 emit_shf(nfp_prog
, reg_a(2),
2375 reg_imm(0xf), SHF_OP_AND
, reg_b(2), SHF_SC_R_SHF
, 0);
2377 emit_alu(nfp_prog
, reg_none(), reg_a(1), ALU_OP_OR
, reg_imm(0));
2378 emit_shf(nfp_prog
, reg_b(2),
2379 reg_imm(0xf), SHF_OP_AND
, reg_b(3), SHF_SC_R_SHF
, 0);
2381 emit_br_relo(nfp_prog
, BR_UNC
, BR_OFF_RELO
, 2, RELO_BR_NEXT_PKT
);
2383 emit_shf(nfp_prog
, reg_b(2),
2384 reg_a(2), SHF_OP_OR
, reg_b(2), SHF_SC_L_SHF
, 4);
2385 emit_ld_field(nfp_prog
, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF
, 16);
2388 static void nfp_outro_xdp(struct nfp_prog
*nfp_prog
)
2390 /* XDP return codes:
2391 * 0 aborted 0x82 -> drop, count as stat3
2392 * 1 drop 0x22 -> drop, count as stat1
2393 * 2 pass 0x11 -> pass, count as stat0
2394 * 3 tx 0x44 -> redir, count as stat2
2395 * * unknown 0x82 -> drop, count as stat3
2397 /* Target for aborts */
2398 nfp_prog
->tgt_abort
= nfp_prog_current_offset(nfp_prog
);
2400 emit_br_relo(nfp_prog
, BR_UNC
, BR_OFF_RELO
, 2, RELO_BR_NEXT_PKT
);
2402 wrp_mov(nfp_prog
, reg_a(0), NFP_BPF_ABI_FLAGS
);
2403 emit_ld_field(nfp_prog
, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF
, 16);
2405 /* Target for normal exits */
2406 nfp_prog
->tgt_out
= nfp_prog_current_offset(nfp_prog
);
2408 /* if R0 > 3 jump to abort */
2409 emit_alu(nfp_prog
, reg_none(), reg_imm(3), ALU_OP_SUB
, reg_b(0));
2410 emit_br(nfp_prog
, BR_BLO
, nfp_prog
->tgt_abort
, 0);
2412 wrp_immed(nfp_prog
, reg_b(2), 0x44112282);
2414 emit_shf(nfp_prog
, reg_a(1),
2415 reg_none(), SHF_OP_NONE
, reg_b(0), SHF_SC_L_SHF
, 3);
2417 emit_alu(nfp_prog
, reg_none(), reg_a(1), ALU_OP_OR
, reg_imm(0));
2418 emit_shf(nfp_prog
, reg_b(2),
2419 reg_imm(0xff), SHF_OP_AND
, reg_b(2), SHF_SC_R_SHF
, 0);
2421 emit_br_relo(nfp_prog
, BR_UNC
, BR_OFF_RELO
, 2, RELO_BR_NEXT_PKT
);
2423 wrp_mov(nfp_prog
, reg_a(0), NFP_BPF_ABI_FLAGS
);
2424 emit_ld_field(nfp_prog
, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF
, 16);
2427 static void nfp_outro(struct nfp_prog
*nfp_prog
)
2429 switch (nfp_prog
->type
) {
2430 case BPF_PROG_TYPE_SCHED_CLS
:
2431 nfp_outro_tc_da(nfp_prog
);
2433 case BPF_PROG_TYPE_XDP
:
2434 nfp_outro_xdp(nfp_prog
);
2441 static int nfp_translate(struct nfp_prog
*nfp_prog
)
2443 struct nfp_insn_meta
*meta
;
2446 nfp_intro(nfp_prog
);
2447 if (nfp_prog
->error
)
2448 return nfp_prog
->error
;
2450 list_for_each_entry(meta
, &nfp_prog
->insns
, l
) {
2451 instr_cb_t cb
= instr_cb
[meta
->insn
.code
];
2453 meta
->off
= nfp_prog_current_offset(nfp_prog
);
2456 nfp_prog
->n_translated
++;
2460 if (nfp_meta_has_prev(nfp_prog
, meta
) &&
2461 nfp_meta_prev(meta
)->double_cb
)
2462 cb
= nfp_meta_prev(meta
)->double_cb
;
2465 err
= cb(nfp_prog
, meta
);
2468 if (nfp_prog
->error
)
2469 return nfp_prog
->error
;
2471 nfp_prog
->n_translated
++;
2474 nfp_prog
->last_bpf_off
= nfp_prog_current_offset(nfp_prog
) - 1;
2476 nfp_outro(nfp_prog
);
2477 if (nfp_prog
->error
)
2478 return nfp_prog
->error
;
2480 wrp_nops(nfp_prog
, NFP_USTORE_PREFETCH_WINDOW
);
2481 if (nfp_prog
->error
)
2482 return nfp_prog
->error
;
2484 return nfp_fixup_branches(nfp_prog
);
2487 /* --- Optimizations --- */
2488 static void nfp_bpf_opt_reg_init(struct nfp_prog
*nfp_prog
)
2490 struct nfp_insn_meta
*meta
;
2492 list_for_each_entry(meta
, &nfp_prog
->insns
, l
) {
2493 struct bpf_insn insn
= meta
->insn
;
2495 /* Programs converted from cBPF start with register xoring */
2496 if (insn
.code
== (BPF_ALU64
| BPF_XOR
| BPF_X
) &&
2497 insn
.src_reg
== insn
.dst_reg
)
2500 /* Programs start with R6 = R1 but we ignore the skb pointer */
2501 if (insn
.code
== (BPF_ALU64
| BPF_MOV
| BPF_X
) &&
2502 insn
.src_reg
== 1 && insn
.dst_reg
== 6)
2505 /* Return as soon as something doesn't match */
2511 /* Remove masking after load since our load guarantees this is not needed */
2512 static void nfp_bpf_opt_ld_mask(struct nfp_prog
*nfp_prog
)
2514 struct nfp_insn_meta
*meta1
, *meta2
;
2515 const s32 exp_mask
[] = {
2516 [BPF_B
] = 0x000000ffU
,
2517 [BPF_H
] = 0x0000ffffU
,
2518 [BPF_W
] = 0xffffffffU
,
2521 nfp_for_each_insn_walk2(nfp_prog
, meta1
, meta2
) {
2522 struct bpf_insn insn
, next
;
2527 if (BPF_CLASS(insn
.code
) != BPF_LD
)
2529 if (BPF_MODE(insn
.code
) != BPF_ABS
&&
2530 BPF_MODE(insn
.code
) != BPF_IND
)
2533 if (next
.code
!= (BPF_ALU64
| BPF_AND
| BPF_K
))
2536 if (!exp_mask
[BPF_SIZE(insn
.code
)])
2538 if (exp_mask
[BPF_SIZE(insn
.code
)] != next
.imm
)
2541 if (next
.src_reg
|| next
.dst_reg
)
2544 if (meta2
->flags
& FLAG_INSN_IS_JUMP_DST
)
2551 static void nfp_bpf_opt_ld_shift(struct nfp_prog
*nfp_prog
)
2553 struct nfp_insn_meta
*meta1
, *meta2
, *meta3
;
2555 nfp_for_each_insn_walk3(nfp_prog
, meta1
, meta2
, meta3
) {
2556 struct bpf_insn insn
, next1
, next2
;
2559 next1
= meta2
->insn
;
2560 next2
= meta3
->insn
;
2562 if (BPF_CLASS(insn
.code
) != BPF_LD
)
2564 if (BPF_MODE(insn
.code
) != BPF_ABS
&&
2565 BPF_MODE(insn
.code
) != BPF_IND
)
2567 if (BPF_SIZE(insn
.code
) != BPF_W
)
2570 if (!(next1
.code
== (BPF_LSH
| BPF_K
| BPF_ALU64
) &&
2571 next2
.code
== (BPF_RSH
| BPF_K
| BPF_ALU64
)) &&
2572 !(next1
.code
== (BPF_RSH
| BPF_K
| BPF_ALU64
) &&
2573 next2
.code
== (BPF_LSH
| BPF_K
| BPF_ALU64
)))
2576 if (next1
.src_reg
|| next1
.dst_reg
||
2577 next2
.src_reg
|| next2
.dst_reg
)
2580 if (next1
.imm
!= 0x20 || next2
.imm
!= 0x20)
2583 if (meta2
->flags
& FLAG_INSN_IS_JUMP_DST
||
2584 meta3
->flags
& FLAG_INSN_IS_JUMP_DST
)
2592 /* load/store pair that forms memory copy sould look like the following:
2594 * ld_width R, [addr_src + offset_src]
2595 * st_width [addr_dest + offset_dest], R
2597 * The destination register of load and source register of store should
2598 * be the same, load and store should also perform at the same width.
2599 * If either of addr_src or addr_dest is stack pointer, we don't do the
2600 * CPP optimization as stack is modelled by registers on NFP.
2603 curr_pair_is_memcpy(struct nfp_insn_meta
*ld_meta
,
2604 struct nfp_insn_meta
*st_meta
)
2606 struct bpf_insn
*ld
= &ld_meta
->insn
;
2607 struct bpf_insn
*st
= &st_meta
->insn
;
2609 if (!is_mbpf_load(ld_meta
) || !is_mbpf_store(st_meta
))
2612 if (ld_meta
->ptr
.type
!= PTR_TO_PACKET
)
2615 if (st_meta
->ptr
.type
!= PTR_TO_PACKET
)
2618 if (BPF_SIZE(ld
->code
) != BPF_SIZE(st
->code
))
2621 if (ld
->dst_reg
!= st
->src_reg
)
2624 /* There is jump to the store insn in this pair. */
2625 if (st_meta
->flags
& FLAG_INSN_IS_JUMP_DST
)
2631 /* Currently, we only support chaining load/store pairs if:
2633 * - Their address base registers are the same.
2634 * - Their address offsets are in the same order.
2635 * - They operate at the same memory width.
2636 * - There is no jump into the middle of them.
2639 curr_pair_chain_with_previous(struct nfp_insn_meta
*ld_meta
,
2640 struct nfp_insn_meta
*st_meta
,
2641 struct bpf_insn
*prev_ld
,
2642 struct bpf_insn
*prev_st
)
2644 u8 prev_size
, curr_size
, prev_ld_base
, prev_st_base
, prev_ld_dst
;
2645 struct bpf_insn
*ld
= &ld_meta
->insn
;
2646 struct bpf_insn
*st
= &st_meta
->insn
;
2647 s16 prev_ld_off
, prev_st_off
;
2649 /* This pair is the start pair. */
2653 prev_size
= BPF_LDST_BYTES(prev_ld
);
2654 curr_size
= BPF_LDST_BYTES(ld
);
2655 prev_ld_base
= prev_ld
->src_reg
;
2656 prev_st_base
= prev_st
->dst_reg
;
2657 prev_ld_dst
= prev_ld
->dst_reg
;
2658 prev_ld_off
= prev_ld
->off
;
2659 prev_st_off
= prev_st
->off
;
2661 if (ld
->dst_reg
!= prev_ld_dst
)
2664 if (ld
->src_reg
!= prev_ld_base
|| st
->dst_reg
!= prev_st_base
)
2667 if (curr_size
!= prev_size
)
2670 /* There is jump to the head of this pair. */
2671 if (ld_meta
->flags
& FLAG_INSN_IS_JUMP_DST
)
2674 /* Both in ascending order. */
2675 if (prev_ld_off
+ prev_size
== ld
->off
&&
2676 prev_st_off
+ prev_size
== st
->off
)
2679 /* Both in descending order. */
2680 if (ld
->off
+ curr_size
== prev_ld_off
&&
2681 st
->off
+ curr_size
== prev_st_off
)
2687 /* Return TRUE if cross memory access happens. Cross memory access means
2688 * store area is overlapping with load area that a later load might load
2689 * the value from previous store, for this case we can't treat the sequence
2690 * as an memory copy.
2693 cross_mem_access(struct bpf_insn
*ld
, struct nfp_insn_meta
*head_ld_meta
,
2694 struct nfp_insn_meta
*head_st_meta
)
2696 s16 head_ld_off
, head_st_off
, ld_off
;
2698 /* Different pointer types does not overlap. */
2699 if (head_ld_meta
->ptr
.type
!= head_st_meta
->ptr
.type
)
2702 /* load and store are both PTR_TO_PACKET, check ID info. */
2703 if (head_ld_meta
->ptr
.id
!= head_st_meta
->ptr
.id
)
2706 /* Canonicalize the offsets. Turn all of them against the original
2709 head_ld_off
= head_ld_meta
->insn
.off
+ head_ld_meta
->ptr
.off
;
2710 head_st_off
= head_st_meta
->insn
.off
+ head_st_meta
->ptr
.off
;
2711 ld_off
= ld
->off
+ head_ld_meta
->ptr
.off
;
2713 /* Ascending order cross. */
2714 if (ld_off
> head_ld_off
&&
2715 head_ld_off
< head_st_off
&& ld_off
>= head_st_off
)
2718 /* Descending order cross. */
2719 if (ld_off
< head_ld_off
&&
2720 head_ld_off
> head_st_off
&& ld_off
<= head_st_off
)
2726 /* This pass try to identify the following instructoin sequences.
2728 * load R, [regA + offA]
2729 * store [regB + offB], R
2730 * load R, [regA + offA + const_imm_A]
2731 * store [regB + offB + const_imm_A], R
2732 * load R, [regA + offA + 2 * const_imm_A]
2733 * store [regB + offB + 2 * const_imm_A], R
2736 * Above sequence is typically generated by compiler when lowering
2737 * memcpy. NFP prefer using CPP instructions to accelerate it.
2739 static void nfp_bpf_opt_ldst_gather(struct nfp_prog
*nfp_prog
)
2741 struct nfp_insn_meta
*head_ld_meta
= NULL
;
2742 struct nfp_insn_meta
*head_st_meta
= NULL
;
2743 struct nfp_insn_meta
*meta1
, *meta2
;
2744 struct bpf_insn
*prev_ld
= NULL
;
2745 struct bpf_insn
*prev_st
= NULL
;
2748 nfp_for_each_insn_walk2(nfp_prog
, meta1
, meta2
) {
2749 struct bpf_insn
*ld
= &meta1
->insn
;
2750 struct bpf_insn
*st
= &meta2
->insn
;
2752 /* Reset record status if any of the following if true:
2753 * - The current insn pair is not load/store.
2754 * - The load/store pair doesn't chain with previous one.
2755 * - The chained load/store pair crossed with previous pair.
2756 * - The chained load/store pair has a total size of memory
2757 * copy beyond 128 bytes which is the maximum length a
2758 * single NFP CPP command can transfer.
2760 if (!curr_pair_is_memcpy(meta1
, meta2
) ||
2761 !curr_pair_chain_with_previous(meta1
, meta2
, prev_ld
,
2763 (head_ld_meta
&& (cross_mem_access(ld
, head_ld_meta
,
2765 head_ld_meta
->ldst_gather_len
>= 128))) {
2770 s16 prev_ld_off
= prev_ld
->off
;
2771 s16 prev_st_off
= prev_st
->off
;
2772 s16 head_ld_off
= head_ld_meta
->insn
.off
;
2774 if (prev_ld_off
< head_ld_off
) {
2775 head_ld_meta
->insn
.off
= prev_ld_off
;
2776 head_st_meta
->insn
.off
= prev_st_off
;
2777 head_ld_meta
->ldst_gather_len
=
2778 -head_ld_meta
->ldst_gather_len
;
2781 head_ld_meta
->paired_st
= &head_st_meta
->insn
;
2782 head_st_meta
->skip
= true;
2784 head_ld_meta
->ldst_gather_len
= 0;
2787 /* If the chain is ended by an load/store pair then this
2788 * could serve as the new head of the the next chain.
2790 if (curr_pair_is_memcpy(meta1
, meta2
)) {
2791 head_ld_meta
= meta1
;
2792 head_st_meta
= meta2
;
2793 head_ld_meta
->ldst_gather_len
=
2795 meta1
= nfp_meta_next(meta1
);
2796 meta2
= nfp_meta_next(meta2
);
2801 head_ld_meta
= NULL
;
2802 head_st_meta
= NULL
;
2811 if (!head_ld_meta
) {
2812 head_ld_meta
= meta1
;
2813 head_st_meta
= meta2
;
2819 head_ld_meta
->ldst_gather_len
+= BPF_LDST_BYTES(ld
);
2820 meta1
= nfp_meta_next(meta1
);
2821 meta2
= nfp_meta_next(meta2
);
2828 static int nfp_bpf_optimize(struct nfp_prog
*nfp_prog
)
2830 nfp_bpf_opt_reg_init(nfp_prog
);
2832 nfp_bpf_opt_ld_mask(nfp_prog
);
2833 nfp_bpf_opt_ld_shift(nfp_prog
);
2834 nfp_bpf_opt_ldst_gather(nfp_prog
);
2839 static int nfp_bpf_ustore_calc(u64
*prog
, unsigned int len
)
2841 __le64
*ustore
= (__force __le64
*)prog
;
2844 for (i
= 0; i
< len
; i
++) {
2847 err
= nfp_ustore_check_valid_no_ecc(prog
[i
]);
2851 ustore
[i
] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog
[i
]));
2857 static void nfp_bpf_prog_trim(struct nfp_prog
*nfp_prog
)
2861 prog
= kvmalloc_array(nfp_prog
->prog_len
, sizeof(u64
), GFP_KERNEL
);
2865 nfp_prog
->__prog_alloc_len
= nfp_prog
->prog_len
* sizeof(u64
);
2866 memcpy(prog
, nfp_prog
->prog
, nfp_prog
->__prog_alloc_len
);
2867 kvfree(nfp_prog
->prog
);
2868 nfp_prog
->prog
= prog
;
2871 int nfp_bpf_jit(struct nfp_prog
*nfp_prog
)
2875 ret
= nfp_bpf_optimize(nfp_prog
);
2879 ret
= nfp_translate(nfp_prog
);
2881 pr_err("Translation failed with error %d (translated: %u)\n",
2882 ret
, nfp_prog
->n_translated
);
2886 nfp_bpf_prog_trim(nfp_prog
);
2891 void nfp_bpf_jit_prepare(struct nfp_prog
*nfp_prog
, unsigned int cnt
)
2893 struct nfp_insn_meta
*meta
;
2895 /* Another pass to record jump information. */
2896 list_for_each_entry(meta
, &nfp_prog
->insns
, l
) {
2897 u64 code
= meta
->insn
.code
;
2899 if (BPF_CLASS(code
) == BPF_JMP
&& BPF_OP(code
) != BPF_EXIT
&&
2900 BPF_OP(code
) != BPF_CALL
) {
2901 struct nfp_insn_meta
*dst_meta
;
2902 unsigned short dst_indx
;
2904 dst_indx
= meta
->n
+ 1 + meta
->insn
.off
;
2905 dst_meta
= nfp_bpf_goto_meta(nfp_prog
, meta
, dst_indx
,
2908 meta
->jmp_dst
= dst_meta
;
2909 dst_meta
->flags
|= FLAG_INSN_IS_JUMP_DST
;
2914 bool nfp_bpf_supported_opcode(u8 code
)
2916 return !!instr_cb
[code
];
2919 void *nfp_bpf_relo_for_vnic(struct nfp_prog
*nfp_prog
, struct nfp_bpf_vnic
*bv
)
2925 prog
= kmemdup(nfp_prog
->prog
, nfp_prog
->prog_len
* sizeof(u64
),
2928 return ERR_PTR(-ENOMEM
);
2930 for (i
= 0; i
< nfp_prog
->prog_len
; i
++) {
2931 enum nfp_relo_type special
;
2934 special
= FIELD_GET(OP_RELO_TYPE
, prog
[i
]);
2939 br_add_offset(&prog
[i
], bv
->start_off
);
2941 case RELO_BR_GO_OUT
:
2942 br_set_offset(&prog
[i
],
2943 nfp_prog
->tgt_out
+ bv
->start_off
);
2945 case RELO_BR_GO_ABORT
:
2946 br_set_offset(&prog
[i
],
2947 nfp_prog
->tgt_abort
+ bv
->start_off
);
2949 case RELO_BR_NEXT_PKT
:
2950 br_set_offset(&prog
[i
], bv
->tgt_done
);
2952 case RELO_BR_HELPER
:
2953 val
= br_get_offset(prog
[i
]);
2956 case BPF_FUNC_map_lookup_elem
:
2957 val
= nfp_prog
->bpf
->helpers
.map_lookup
;
2960 pr_err("relocation of unknown helper %d\n",
2965 br_set_offset(&prog
[i
], val
);
2967 case RELO_IMMED_REL
:
2968 immed_add_value(&prog
[i
], bv
->start_off
);
2972 prog
[i
] &= ~OP_RELO_TYPE
;
2975 err
= nfp_bpf_ustore_calc(prog
, nfp_prog
->prog_len
);
2983 return ERR_PTR(err
);