Linux 4.16.11
[linux/fpc-iii.git] / drivers / net / ethernet / netronome / nfp / bpf / jit.c
blobecd7c33baf3c9c1d014bc460d0d9c9048c08e548
1 /*
2 * Copyright (C) 2016-2017 Netronome Systems, Inc.
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
9 * The BSD 2-Clause License:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
34 #define pr_fmt(fmt) "NFP net bpf: " fmt
36 #include <linux/bug.h>
37 #include <linux/kernel.h>
38 #include <linux/bpf.h>
39 #include <linux/filter.h>
40 #include <linux/pkt_cls.h>
41 #include <linux/unistd.h>
43 #include "main.h"
44 #include "../nfp_asm.h"
46 /* --- NFP prog --- */
47 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
48 * It's safe to modify the next pointers (but not pos).
50 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \
51 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
52 next = list_next_entry(pos, l); \
53 &(nfp_prog)->insns != &pos->l && \
54 &(nfp_prog)->insns != &next->l; \
55 pos = nfp_meta_next(pos), \
56 next = nfp_meta_next(pos))
58 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \
59 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
60 next = list_next_entry(pos, l), \
61 next2 = list_next_entry(next, l); \
62 &(nfp_prog)->insns != &pos->l && \
63 &(nfp_prog)->insns != &next->l && \
64 &(nfp_prog)->insns != &next2->l; \
65 pos = nfp_meta_next(pos), \
66 next = nfp_meta_next(pos), \
67 next2 = nfp_meta_next(next))
69 static bool
70 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
72 return meta->l.prev != &nfp_prog->insns;
75 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
77 if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
78 pr_warn("instruction limit reached (%u NFP instructions)\n",
79 nfp_prog->prog_len);
80 nfp_prog->error = -ENOSPC;
81 return;
84 nfp_prog->prog[nfp_prog->prog_len] = insn;
85 nfp_prog->prog_len++;
88 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
90 return nfp_prog->prog_len;
93 static bool
94 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
96 /* If there is a recorded error we may have dropped instructions;
97 * that doesn't have to be due to translator bug, and the translation
98 * will fail anyway, so just return OK.
100 if (nfp_prog->error)
101 return true;
102 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
105 /* --- Emitters --- */
106 static void
107 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
108 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir)
110 enum cmd_ctx_swap ctx;
111 u64 insn;
113 if (sync)
114 ctx = CMD_CTX_SWAP;
115 else
116 ctx = CMD_CTX_NO_SWAP;
118 insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
119 FIELD_PREP(OP_CMD_CTX, ctx) |
120 FIELD_PREP(OP_CMD_B_SRC, breg) |
121 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
122 FIELD_PREP(OP_CMD_XFER, xfer) |
123 FIELD_PREP(OP_CMD_CNT, size) |
124 FIELD_PREP(OP_CMD_SIG, sync) |
125 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
126 FIELD_PREP(OP_CMD_INDIR, indir) |
127 FIELD_PREP(OP_CMD_MODE, mode);
129 nfp_prog_push(nfp_prog, insn);
132 static void
133 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
134 swreg lreg, swreg rreg, u8 size, bool sync, bool indir)
136 struct nfp_insn_re_regs reg;
137 int err;
139 err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
140 if (err) {
141 nfp_prog->error = err;
142 return;
144 if (reg.swap) {
145 pr_err("cmd can't swap arguments\n");
146 nfp_prog->error = -EFAULT;
147 return;
149 if (reg.dst_lmextn || reg.src_lmextn) {
150 pr_err("cmd can't use LMextn\n");
151 nfp_prog->error = -EFAULT;
152 return;
155 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync,
156 indir);
159 static void
160 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
161 swreg lreg, swreg rreg, u8 size, bool sync)
163 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false);
166 static void
167 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
168 swreg lreg, swreg rreg, u8 size, bool sync)
170 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true);
173 static void
174 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
175 enum br_ctx_signal_state css, u16 addr, u8 defer)
177 u16 addr_lo, addr_hi;
178 u64 insn;
180 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
181 addr_hi = addr != addr_lo;
183 insn = OP_BR_BASE |
184 FIELD_PREP(OP_BR_MASK, mask) |
185 FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
186 FIELD_PREP(OP_BR_CSS, css) |
187 FIELD_PREP(OP_BR_DEFBR, defer) |
188 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
189 FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
191 nfp_prog_push(nfp_prog, insn);
194 static void
195 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
196 enum nfp_relo_type relo)
198 if (mask == BR_UNC && defer > 2) {
199 pr_err("BUG: branch defer out of bounds %d\n", defer);
200 nfp_prog->error = -EFAULT;
201 return;
204 __emit_br(nfp_prog, mask,
205 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
206 BR_CSS_NONE, addr, defer);
208 nfp_prog->prog[nfp_prog->prog_len - 1] |=
209 FIELD_PREP(OP_RELO_TYPE, relo);
212 static void
213 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
215 emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
218 static void
219 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
220 enum immed_width width, bool invert,
221 enum immed_shift shift, bool wr_both,
222 bool dst_lmextn, bool src_lmextn)
224 u64 insn;
226 insn = OP_IMMED_BASE |
227 FIELD_PREP(OP_IMMED_A_SRC, areg) |
228 FIELD_PREP(OP_IMMED_B_SRC, breg) |
229 FIELD_PREP(OP_IMMED_IMM, imm_hi) |
230 FIELD_PREP(OP_IMMED_WIDTH, width) |
231 FIELD_PREP(OP_IMMED_INV, invert) |
232 FIELD_PREP(OP_IMMED_SHIFT, shift) |
233 FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
234 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
235 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
237 nfp_prog_push(nfp_prog, insn);
240 static void
241 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
242 enum immed_width width, bool invert, enum immed_shift shift)
244 struct nfp_insn_ur_regs reg;
245 int err;
247 if (swreg_type(dst) == NN_REG_IMM) {
248 nfp_prog->error = -EFAULT;
249 return;
252 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
253 if (err) {
254 nfp_prog->error = err;
255 return;
258 /* Use reg.dst when destination is No-Dest. */
259 __emit_immed(nfp_prog,
260 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
261 reg.breg, imm >> 8, width, invert, shift,
262 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
265 static void
266 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
267 enum shf_sc sc, u8 shift,
268 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
269 bool dst_lmextn, bool src_lmextn)
271 u64 insn;
273 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
274 nfp_prog->error = -EFAULT;
275 return;
278 if (sc == SHF_SC_L_SHF)
279 shift = 32 - shift;
281 insn = OP_SHF_BASE |
282 FIELD_PREP(OP_SHF_A_SRC, areg) |
283 FIELD_PREP(OP_SHF_SC, sc) |
284 FIELD_PREP(OP_SHF_B_SRC, breg) |
285 FIELD_PREP(OP_SHF_I8, i8) |
286 FIELD_PREP(OP_SHF_SW, sw) |
287 FIELD_PREP(OP_SHF_DST, dst) |
288 FIELD_PREP(OP_SHF_SHIFT, shift) |
289 FIELD_PREP(OP_SHF_OP, op) |
290 FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
291 FIELD_PREP(OP_SHF_WR_AB, wr_both) |
292 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
293 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
295 nfp_prog_push(nfp_prog, insn);
298 static void
299 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
300 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
302 struct nfp_insn_re_regs reg;
303 int err;
305 err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
306 if (err) {
307 nfp_prog->error = err;
308 return;
311 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
312 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
313 reg.dst_lmextn, reg.src_lmextn);
316 static void
317 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
318 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
319 bool dst_lmextn, bool src_lmextn)
321 u64 insn;
323 insn = OP_ALU_BASE |
324 FIELD_PREP(OP_ALU_A_SRC, areg) |
325 FIELD_PREP(OP_ALU_B_SRC, breg) |
326 FIELD_PREP(OP_ALU_DST, dst) |
327 FIELD_PREP(OP_ALU_SW, swap) |
328 FIELD_PREP(OP_ALU_OP, op) |
329 FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
330 FIELD_PREP(OP_ALU_WR_AB, wr_both) |
331 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
332 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
334 nfp_prog_push(nfp_prog, insn);
337 static void
338 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
339 swreg lreg, enum alu_op op, swreg rreg)
341 struct nfp_insn_ur_regs reg;
342 int err;
344 err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
345 if (err) {
346 nfp_prog->error = err;
347 return;
350 __emit_alu(nfp_prog, reg.dst, reg.dst_ab,
351 reg.areg, op, reg.breg, reg.swap, reg.wr_both,
352 reg.dst_lmextn, reg.src_lmextn);
355 static void
356 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
357 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
358 bool zero, bool swap, bool wr_both,
359 bool dst_lmextn, bool src_lmextn)
361 u64 insn;
363 insn = OP_LDF_BASE |
364 FIELD_PREP(OP_LDF_A_SRC, areg) |
365 FIELD_PREP(OP_LDF_SC, sc) |
366 FIELD_PREP(OP_LDF_B_SRC, breg) |
367 FIELD_PREP(OP_LDF_I8, imm8) |
368 FIELD_PREP(OP_LDF_SW, swap) |
369 FIELD_PREP(OP_LDF_ZF, zero) |
370 FIELD_PREP(OP_LDF_BMASK, bmask) |
371 FIELD_PREP(OP_LDF_SHF, shift) |
372 FIELD_PREP(OP_LDF_WR_AB, wr_both) |
373 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
374 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
376 nfp_prog_push(nfp_prog, insn);
379 static void
380 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
381 enum shf_sc sc, u8 shift, bool zero)
383 struct nfp_insn_re_regs reg;
384 int err;
386 /* Note: ld_field is special as it uses one of the src regs as dst */
387 err = swreg_to_restricted(dst, dst, src, &reg, true);
388 if (err) {
389 nfp_prog->error = err;
390 return;
393 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
394 reg.i8, zero, reg.swap, reg.wr_both,
395 reg.dst_lmextn, reg.src_lmextn);
398 static void
399 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
400 enum shf_sc sc, u8 shift)
402 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
405 static void
406 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
407 bool dst_lmextn, bool src_lmextn)
409 u64 insn;
411 insn = OP_LCSR_BASE |
412 FIELD_PREP(OP_LCSR_A_SRC, areg) |
413 FIELD_PREP(OP_LCSR_B_SRC, breg) |
414 FIELD_PREP(OP_LCSR_WRITE, wr) |
415 FIELD_PREP(OP_LCSR_ADDR, addr) |
416 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
417 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
419 nfp_prog_push(nfp_prog, insn);
422 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
424 struct nfp_insn_ur_regs reg;
425 int err;
427 /* This instruction takes immeds instead of reg_none() for the ignored
428 * operand, but we can't encode 2 immeds in one instr with our normal
429 * swreg infra so if param is an immed, we encode as reg_none() and
430 * copy the immed to both operands.
432 if (swreg_type(src) == NN_REG_IMM) {
433 err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
434 reg.breg = reg.areg;
435 } else {
436 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
438 if (err) {
439 nfp_prog->error = err;
440 return;
443 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
444 false, reg.src_lmextn);
447 static void emit_nop(struct nfp_prog *nfp_prog)
449 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
452 /* --- Wrappers --- */
453 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
455 if (!(imm & 0xffff0000)) {
456 *val = imm;
457 *shift = IMMED_SHIFT_0B;
458 } else if (!(imm & 0xff0000ff)) {
459 *val = imm >> 8;
460 *shift = IMMED_SHIFT_1B;
461 } else if (!(imm & 0x0000ffff)) {
462 *val = imm >> 16;
463 *shift = IMMED_SHIFT_2B;
464 } else {
465 return false;
468 return true;
471 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
473 enum immed_shift shift;
474 u16 val;
476 if (pack_immed(imm, &val, &shift)) {
477 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
478 } else if (pack_immed(~imm, &val, &shift)) {
479 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
480 } else {
481 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
482 false, IMMED_SHIFT_0B);
483 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
484 false, IMMED_SHIFT_2B);
488 static void
489 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
490 enum nfp_relo_type relo)
492 if (imm > 0xffff) {
493 pr_err("relocation of a large immediate!\n");
494 nfp_prog->error = -EFAULT;
495 return;
497 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
499 nfp_prog->prog[nfp_prog->prog_len - 1] |=
500 FIELD_PREP(OP_RELO_TYPE, relo);
503 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
504 * If the @imm is small enough encode it directly in operand and return
505 * otherwise load @imm to a spare register and return its encoding.
507 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
509 if (FIELD_FIT(UR_REG_IMM_MAX, imm))
510 return reg_imm(imm);
512 wrp_immed(nfp_prog, tmp_reg, imm);
513 return tmp_reg;
516 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
517 * If the @imm is small enough encode it directly in operand and return
518 * otherwise load @imm to a spare register and return its encoding.
520 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
522 if (FIELD_FIT(RE_REG_IMM_MAX, imm))
523 return reg_imm(imm);
525 wrp_immed(nfp_prog, tmp_reg, imm);
526 return tmp_reg;
529 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
531 while (count--)
532 emit_nop(nfp_prog);
535 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
537 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
540 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
542 wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
545 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
546 * result to @dst from low end.
548 static void
549 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
550 u8 offset)
552 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
553 u8 mask = (1 << field_len) - 1;
555 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
558 static void
559 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
560 swreg *rega, swreg *regb)
562 if (offset == reg_imm(0)) {
563 *rega = reg_a(src_gpr);
564 *regb = reg_b(src_gpr + 1);
565 return;
568 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
569 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
570 reg_imm(0));
571 *rega = imm_a(nfp_prog);
572 *regb = imm_b(nfp_prog);
575 /* NFP has Command Push Pull bus which supports bluk memory operations. */
576 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
578 bool descending_seq = meta->ldst_gather_len < 0;
579 s16 len = abs(meta->ldst_gather_len);
580 swreg src_base, off;
581 bool src_40bit_addr;
582 unsigned int i;
583 u8 xfer_num;
585 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
586 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
587 src_base = reg_a(meta->insn.src_reg * 2);
588 xfer_num = round_up(len, 4) / 4;
590 if (src_40bit_addr)
591 addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
592 &off);
594 /* Setup PREV_ALU fields to override memory read length. */
595 if (len > 32)
596 wrp_immed(nfp_prog, reg_none(),
597 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
599 /* Memory read from source addr into transfer-in registers. */
600 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
601 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
602 src_base, off, xfer_num - 1, true, len > 32);
604 /* Move from transfer-in to transfer-out. */
605 for (i = 0; i < xfer_num; i++)
606 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
608 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
610 if (len <= 8) {
611 /* Use single direct_ref write8. */
612 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
613 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
614 true);
615 } else if (len <= 32 && IS_ALIGNED(len, 4)) {
616 /* Use single direct_ref write32. */
617 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
618 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
619 true);
620 } else if (len <= 32) {
621 /* Use single indirect_ref write8. */
622 wrp_immed(nfp_prog, reg_none(),
623 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
624 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
625 reg_a(meta->paired_st->dst_reg * 2), off,
626 len - 1, true);
627 } else if (IS_ALIGNED(len, 4)) {
628 /* Use single indirect_ref write32. */
629 wrp_immed(nfp_prog, reg_none(),
630 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
631 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
632 reg_a(meta->paired_st->dst_reg * 2), off,
633 xfer_num - 1, true);
634 } else if (len <= 40) {
635 /* Use one direct_ref write32 to write the first 32-bytes, then
636 * another direct_ref write8 to write the remaining bytes.
638 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
639 reg_a(meta->paired_st->dst_reg * 2), off, 7,
640 true);
642 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
643 imm_b(nfp_prog));
644 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
645 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
646 true);
647 } else {
648 /* Use one indirect_ref write32 to write 4-bytes aligned length,
649 * then another direct_ref write8 to write the remaining bytes.
651 u8 new_off;
653 wrp_immed(nfp_prog, reg_none(),
654 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
655 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
656 reg_a(meta->paired_st->dst_reg * 2), off,
657 xfer_num - 2, true);
658 new_off = meta->paired_st->off + (xfer_num - 1) * 4;
659 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
660 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
661 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
662 (len & 0x3) - 1, true);
665 /* TODO: The following extra load is to make sure data flow be identical
666 * before and after we do memory copy optimization.
668 * The load destination register is not guaranteed to be dead, so we
669 * need to make sure it is loaded with the value the same as before
670 * this transformation.
672 * These extra loads could be removed once we have accurate register
673 * usage information.
675 if (descending_seq)
676 xfer_num = 0;
677 else if (BPF_SIZE(meta->insn.code) != BPF_DW)
678 xfer_num = xfer_num - 1;
679 else
680 xfer_num = xfer_num - 2;
682 switch (BPF_SIZE(meta->insn.code)) {
683 case BPF_B:
684 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
685 reg_xfer(xfer_num), 1,
686 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
687 break;
688 case BPF_H:
689 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
690 reg_xfer(xfer_num), 2, (len & 3) ^ 2);
691 break;
692 case BPF_W:
693 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
694 reg_xfer(0));
695 break;
696 case BPF_DW:
697 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
698 reg_xfer(xfer_num));
699 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
700 reg_xfer(xfer_num + 1));
701 break;
704 if (BPF_SIZE(meta->insn.code) != BPF_DW)
705 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
707 return 0;
710 static int
711 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
713 unsigned int i;
714 u16 shift, sz;
716 /* We load the value from the address indicated in @offset and then
717 * shift out the data we don't need. Note: this is big endian!
719 sz = max(size, 4);
720 shift = size < 4 ? 4 - size : 0;
722 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
723 pptr_reg(nfp_prog), offset, sz - 1, true);
725 i = 0;
726 if (shift)
727 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
728 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
729 else
730 for (; i * 4 < size; i++)
731 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
733 if (i < 2)
734 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
736 return 0;
739 static int
740 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
741 swreg lreg, swreg rreg, int size, enum cmd_mode mode)
743 unsigned int i;
744 u8 mask, sz;
746 /* We load the value from the address indicated in rreg + lreg and then
747 * mask out the data we don't need. Note: this is little endian!
749 sz = max(size, 4);
750 mask = size < 4 ? GENMASK(size - 1, 0) : 0;
752 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
753 lreg, rreg, sz / 4 - 1, true);
755 i = 0;
756 if (mask)
757 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
758 reg_xfer(0), SHF_SC_NONE, 0, true);
759 else
760 for (; i * 4 < size; i++)
761 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
763 if (i < 2)
764 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
766 return 0;
769 static int
770 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
771 u8 dst_gpr, u8 size)
773 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
774 size, CMD_MODE_32b);
777 static int
778 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
779 u8 dst_gpr, u8 size)
781 swreg rega, regb;
783 addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
785 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
786 size, CMD_MODE_40b_BA);
789 static int
790 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
792 swreg tmp_reg;
794 /* Calculate the true offset (src_reg + imm) */
795 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
796 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
798 /* Check packet length (size guaranteed to fit b/c it's u8) */
799 emit_alu(nfp_prog, imm_a(nfp_prog),
800 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
801 emit_alu(nfp_prog, reg_none(),
802 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
803 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
805 /* Load data */
806 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
809 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
811 swreg tmp_reg;
813 /* Check packet length */
814 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
815 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
816 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
818 /* Load data */
819 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
820 return data_ld(nfp_prog, tmp_reg, 0, size);
823 static int
824 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
825 u8 src_gpr, u8 size)
827 unsigned int i;
829 for (i = 0; i * 4 < size; i++)
830 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
832 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
833 reg_a(dst_gpr), offset, size - 1, true);
835 return 0;
838 static int
839 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
840 u64 imm, u8 size)
842 wrp_immed(nfp_prog, reg_xfer(0), imm);
843 if (size == 8)
844 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
846 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
847 reg_a(dst_gpr), offset, size - 1, true);
849 return 0;
852 typedef int
853 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
854 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
855 bool needs_inc);
857 static int
858 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
859 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
860 bool needs_inc)
862 bool should_inc = needs_inc && new_gpr && !last;
863 u32 idx, src_byte;
864 enum shf_sc sc;
865 swreg reg;
866 int shf;
867 u8 mask;
869 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
870 return -EOPNOTSUPP;
872 idx = off / 4;
874 /* Move the entire word */
875 if (size == 4) {
876 wrp_mov(nfp_prog, reg_both(dst),
877 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
878 return 0;
881 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
882 return -EOPNOTSUPP;
884 src_byte = off % 4;
886 mask = (1 << size) - 1;
887 mask <<= dst_byte;
889 if (WARN_ON_ONCE(mask > 0xf))
890 return -EOPNOTSUPP;
892 shf = abs(src_byte - dst_byte) * 8;
893 if (src_byte == dst_byte) {
894 sc = SHF_SC_NONE;
895 } else if (src_byte < dst_byte) {
896 shf = 32 - shf;
897 sc = SHF_SC_L_SHF;
898 } else {
899 sc = SHF_SC_R_SHF;
902 /* ld_field can address fewer indexes, if offset too large do RMW.
903 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
905 if (idx <= RE_REG_LM_IDX_MAX) {
906 reg = reg_lm(lm3 ? 3 : 0, idx);
907 } else {
908 reg = imm_a(nfp_prog);
909 /* If it's not the first part of the load and we start a new GPR
910 * that means we are loading a second part of the LMEM word into
911 * a new GPR. IOW we've already looked that LMEM word and
912 * therefore it has been loaded into imm_a().
914 if (first || !new_gpr)
915 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
918 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
920 if (should_inc)
921 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
923 return 0;
926 static int
927 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
928 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
929 bool needs_inc)
931 bool should_inc = needs_inc && new_gpr && !last;
932 u32 idx, dst_byte;
933 enum shf_sc sc;
934 swreg reg;
935 int shf;
936 u8 mask;
938 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
939 return -EOPNOTSUPP;
941 idx = off / 4;
943 /* Move the entire word */
944 if (size == 4) {
945 wrp_mov(nfp_prog,
946 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
947 reg_b(src));
948 return 0;
951 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
952 return -EOPNOTSUPP;
954 dst_byte = off % 4;
956 mask = (1 << size) - 1;
957 mask <<= dst_byte;
959 if (WARN_ON_ONCE(mask > 0xf))
960 return -EOPNOTSUPP;
962 shf = abs(src_byte - dst_byte) * 8;
963 if (src_byte == dst_byte) {
964 sc = SHF_SC_NONE;
965 } else if (src_byte < dst_byte) {
966 shf = 32 - shf;
967 sc = SHF_SC_L_SHF;
968 } else {
969 sc = SHF_SC_R_SHF;
972 /* ld_field can address fewer indexes, if offset too large do RMW.
973 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
975 if (idx <= RE_REG_LM_IDX_MAX) {
976 reg = reg_lm(lm3 ? 3 : 0, idx);
977 } else {
978 reg = imm_a(nfp_prog);
979 /* Only first and last LMEM locations are going to need RMW,
980 * the middle location will be overwritten fully.
982 if (first || last)
983 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
986 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
988 if (new_gpr || last) {
989 if (idx > RE_REG_LM_IDX_MAX)
990 wrp_mov(nfp_prog, reg_lm(0, idx), reg);
991 if (should_inc)
992 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
995 return 0;
998 static int
999 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1000 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1001 bool clr_gpr, lmem_step step)
1003 s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
1004 bool first = true, last;
1005 bool needs_inc = false;
1006 swreg stack_off_reg;
1007 u8 prev_gpr = 255;
1008 u32 gpr_byte = 0;
1009 bool lm3 = true;
1010 int ret;
1012 if (meta->ptr_not_const) {
1013 /* Use of the last encountered ptr_off is OK, they all have
1014 * the same alignment. Depend on low bits of value being
1015 * discarded when written to LMaddr register.
1017 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1018 stack_imm(nfp_prog));
1020 emit_alu(nfp_prog, imm_b(nfp_prog),
1021 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1023 needs_inc = true;
1024 } else if (off + size <= 64) {
1025 /* We can reach bottom 64B with LMaddr0 */
1026 lm3 = false;
1027 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1028 /* We have to set up a new pointer. If we know the offset
1029 * and the entire access falls into a single 32 byte aligned
1030 * window we won't have to increment the LM pointer.
1031 * The 32 byte alignment is imporant because offset is ORed in
1032 * not added when doing *l$indexN[off].
1034 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1035 stack_imm(nfp_prog));
1036 emit_alu(nfp_prog, imm_b(nfp_prog),
1037 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1039 off %= 32;
1040 } else {
1041 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1042 stack_imm(nfp_prog));
1044 emit_alu(nfp_prog, imm_b(nfp_prog),
1045 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1047 needs_inc = true;
1049 if (lm3) {
1050 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1051 /* For size < 4 one slot will be filled by zeroing of upper. */
1052 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1055 if (clr_gpr && size < 8)
1056 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1058 while (size) {
1059 u32 slice_end;
1060 u8 slice_size;
1062 slice_size = min(size, 4 - gpr_byte);
1063 slice_end = min(off + slice_size, round_up(off + 1, 4));
1064 slice_size = slice_end - off;
1066 last = slice_size == size;
1068 if (needs_inc)
1069 off %= 4;
1071 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
1072 first, gpr != prev_gpr, last, lm3, needs_inc);
1073 if (ret)
1074 return ret;
1076 prev_gpr = gpr;
1077 first = false;
1079 gpr_byte += slice_size;
1080 if (gpr_byte >= 4) {
1081 gpr_byte -= 4;
1082 gpr++;
1085 size -= slice_size;
1086 off += slice_size;
1089 return 0;
1092 static void
1093 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1095 swreg tmp_reg;
1097 if (alu_op == ALU_OP_AND) {
1098 if (!imm)
1099 wrp_immed(nfp_prog, reg_both(dst), 0);
1100 if (!imm || !~imm)
1101 return;
1103 if (alu_op == ALU_OP_OR) {
1104 if (!~imm)
1105 wrp_immed(nfp_prog, reg_both(dst), ~0U);
1106 if (!imm || !~imm)
1107 return;
1109 if (alu_op == ALU_OP_XOR) {
1110 if (!~imm)
1111 emit_alu(nfp_prog, reg_both(dst), reg_none(),
1112 ALU_OP_NOT, reg_b(dst));
1113 if (!imm || !~imm)
1114 return;
1117 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1118 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1121 static int
1122 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1123 enum alu_op alu_op, bool skip)
1125 const struct bpf_insn *insn = &meta->insn;
1126 u64 imm = insn->imm; /* sign extend */
1128 if (skip) {
1129 meta->skip = true;
1130 return 0;
1133 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1134 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1136 return 0;
1139 static int
1140 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1141 enum alu_op alu_op)
1143 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1145 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1146 emit_alu(nfp_prog, reg_both(dst + 1),
1147 reg_a(dst + 1), alu_op, reg_b(src + 1));
1149 return 0;
1152 static int
1153 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1154 enum alu_op alu_op, bool skip)
1156 const struct bpf_insn *insn = &meta->insn;
1158 if (skip) {
1159 meta->skip = true;
1160 return 0;
1163 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1164 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1166 return 0;
1169 static int
1170 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1171 enum alu_op alu_op)
1173 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1175 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1176 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1178 return 0;
1181 static void
1182 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1183 enum br_mask br_mask, u16 off)
1185 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1186 emit_br(nfp_prog, br_mask, off, 0);
1189 static int
1190 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1191 enum alu_op alu_op, enum br_mask br_mask)
1193 const struct bpf_insn *insn = &meta->insn;
1195 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1196 insn->src_reg * 2, br_mask, insn->off);
1197 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1198 insn->src_reg * 2 + 1, br_mask, insn->off);
1200 return 0;
1203 static int
1204 wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1205 enum br_mask br_mask, bool swap)
1207 const struct bpf_insn *insn = &meta->insn;
1208 u64 imm = insn->imm; /* sign extend */
1209 u8 reg = insn->dst_reg * 2;
1210 swreg tmp_reg;
1212 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1213 if (!swap)
1214 emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
1215 else
1216 emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
1218 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1219 if (!swap)
1220 emit_alu(nfp_prog, reg_none(),
1221 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
1222 else
1223 emit_alu(nfp_prog, reg_none(),
1224 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
1226 emit_br(nfp_prog, br_mask, insn->off, 0);
1228 return 0;
1231 static int
1232 wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1233 enum br_mask br_mask, bool swap)
1235 const struct bpf_insn *insn = &meta->insn;
1236 u8 areg, breg;
1238 areg = insn->dst_reg * 2;
1239 breg = insn->src_reg * 2;
1241 if (swap) {
1242 areg ^= breg;
1243 breg ^= areg;
1244 areg ^= breg;
1247 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1248 emit_alu(nfp_prog, reg_none(),
1249 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1250 emit_br(nfp_prog, br_mask, insn->off, 0);
1252 return 0;
1255 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1257 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1258 SHF_SC_R_ROT, 8);
1259 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1260 SHF_SC_R_ROT, 16);
1263 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1265 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1266 struct nfp_bpf_cap_adjust_head *adjust_head;
1267 u32 ret_einval, end;
1269 adjust_head = &nfp_prog->bpf->adjust_head;
1271 /* Optimized version - 5 vs 14 cycles */
1272 if (nfp_prog->adjust_head_location != UINT_MAX) {
1273 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1274 return -EINVAL;
1276 emit_alu(nfp_prog, pptr_reg(nfp_prog),
1277 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1278 emit_alu(nfp_prog, plen_reg(nfp_prog),
1279 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1280 emit_alu(nfp_prog, pv_len(nfp_prog),
1281 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1283 wrp_immed(nfp_prog, reg_both(0), 0);
1284 wrp_immed(nfp_prog, reg_both(1), 0);
1286 /* TODO: when adjust head is guaranteed to succeed we can
1287 * also eliminate the following if (r0 == 0) branch.
1290 return 0;
1293 ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1294 end = ret_einval + 2;
1296 /* We need to use a temp because offset is just a part of the pkt ptr */
1297 emit_alu(nfp_prog, tmp,
1298 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1300 /* Validate result will fit within FW datapath constraints */
1301 emit_alu(nfp_prog, reg_none(),
1302 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1303 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1304 emit_alu(nfp_prog, reg_none(),
1305 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1306 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1308 /* Validate the length is at least ETH_HLEN */
1309 emit_alu(nfp_prog, tmp_len,
1310 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1311 emit_alu(nfp_prog, reg_none(),
1312 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1313 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1315 /* Load the ret code */
1316 wrp_immed(nfp_prog, reg_both(0), 0);
1317 wrp_immed(nfp_prog, reg_both(1), 0);
1319 /* Modify the packet metadata */
1320 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1322 /* Skip over the -EINVAL ret code (defer 2) */
1323 emit_br(nfp_prog, BR_UNC, end, 2);
1325 emit_alu(nfp_prog, plen_reg(nfp_prog),
1326 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1327 emit_alu(nfp_prog, pv_len(nfp_prog),
1328 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1330 /* return -EINVAL target */
1331 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1332 return -EINVAL;
1334 wrp_immed(nfp_prog, reg_both(0), -22);
1335 wrp_immed(nfp_prog, reg_both(1), ~0);
1337 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1338 return -EINVAL;
1340 return 0;
1343 static int
1344 map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1346 struct bpf_offloaded_map *offmap;
1347 struct nfp_bpf_map *nfp_map;
1348 bool load_lm_ptr;
1349 u32 ret_tgt;
1350 s64 lm_off;
1351 swreg tid;
1353 offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
1354 nfp_map = offmap->dev_priv;
1356 /* We only have to reload LM0 if the key is not at start of stack */
1357 lm_off = nfp_prog->stack_depth;
1358 lm_off += meta->arg2.var_off.value + meta->arg2.off;
1359 load_lm_ptr = meta->arg2_var_off || lm_off;
1361 /* Set LM0 to start of key */
1362 if (load_lm_ptr)
1363 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1365 /* Load map ID into a register, it should actually fit as an immediate
1366 * but in case it doesn't deal with it here, not in the delay slots.
1368 tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
1370 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem,
1371 2, RELO_BR_HELPER);
1372 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1374 /* Load map ID into A0 */
1375 wrp_mov(nfp_prog, reg_a(0), tid);
1377 /* Load the return address into B0 */
1378 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1380 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1381 return -EINVAL;
1383 /* Reset the LM0 pointer */
1384 if (!load_lm_ptr)
1385 return 0;
1387 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
1388 wrp_nops(nfp_prog, 3);
1390 return 0;
1393 /* --- Callbacks --- */
1394 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1396 const struct bpf_insn *insn = &meta->insn;
1397 u8 dst = insn->dst_reg * 2;
1398 u8 src = insn->src_reg * 2;
1400 if (insn->src_reg == BPF_REG_10) {
1401 swreg stack_depth_reg;
1403 stack_depth_reg = ur_load_imm_any(nfp_prog,
1404 nfp_prog->stack_depth,
1405 stack_imm(nfp_prog));
1406 emit_alu(nfp_prog, reg_both(dst),
1407 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1408 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1409 } else {
1410 wrp_reg_mov(nfp_prog, dst, src);
1411 wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1414 return 0;
1417 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1419 u64 imm = meta->insn.imm; /* sign extend */
1421 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1422 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1424 return 0;
1427 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1429 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1432 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1434 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1437 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1439 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1442 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1444 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1447 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1449 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1452 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1454 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1457 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1459 const struct bpf_insn *insn = &meta->insn;
1461 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1462 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1463 reg_b(insn->src_reg * 2));
1464 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1465 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1466 reg_b(insn->src_reg * 2 + 1));
1468 return 0;
1471 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1473 const struct bpf_insn *insn = &meta->insn;
1474 u64 imm = insn->imm; /* sign extend */
1476 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1477 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1479 return 0;
1482 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1484 const struct bpf_insn *insn = &meta->insn;
1486 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1487 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1488 reg_b(insn->src_reg * 2));
1489 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1490 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1491 reg_b(insn->src_reg * 2 + 1));
1493 return 0;
1496 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1498 const struct bpf_insn *insn = &meta->insn;
1499 u64 imm = insn->imm; /* sign extend */
1501 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1502 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1504 return 0;
1507 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1509 const struct bpf_insn *insn = &meta->insn;
1511 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1512 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1513 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1514 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1516 return 0;
1519 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1521 const struct bpf_insn *insn = &meta->insn;
1522 u8 dst = insn->dst_reg * 2;
1524 if (insn->imm < 32) {
1525 emit_shf(nfp_prog, reg_both(dst + 1),
1526 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1527 SHF_SC_R_DSHF, 32 - insn->imm);
1528 emit_shf(nfp_prog, reg_both(dst),
1529 reg_none(), SHF_OP_NONE, reg_b(dst),
1530 SHF_SC_L_SHF, insn->imm);
1531 } else if (insn->imm == 32) {
1532 wrp_reg_mov(nfp_prog, dst + 1, dst);
1533 wrp_immed(nfp_prog, reg_both(dst), 0);
1534 } else if (insn->imm > 32) {
1535 emit_shf(nfp_prog, reg_both(dst + 1),
1536 reg_none(), SHF_OP_NONE, reg_b(dst),
1537 SHF_SC_L_SHF, insn->imm - 32);
1538 wrp_immed(nfp_prog, reg_both(dst), 0);
1541 return 0;
1544 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1546 const struct bpf_insn *insn = &meta->insn;
1547 u8 dst = insn->dst_reg * 2;
1549 if (insn->imm < 32) {
1550 emit_shf(nfp_prog, reg_both(dst),
1551 reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
1552 SHF_SC_R_DSHF, insn->imm);
1553 emit_shf(nfp_prog, reg_both(dst + 1),
1554 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1555 SHF_SC_R_SHF, insn->imm);
1556 } else if (insn->imm == 32) {
1557 wrp_reg_mov(nfp_prog, dst, dst + 1);
1558 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1559 } else if (insn->imm > 32) {
1560 emit_shf(nfp_prog, reg_both(dst),
1561 reg_none(), SHF_OP_NONE, reg_b(dst + 1),
1562 SHF_SC_R_SHF, insn->imm - 32);
1563 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1566 return 0;
1569 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1571 const struct bpf_insn *insn = &meta->insn;
1573 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
1574 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1576 return 0;
1579 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1581 const struct bpf_insn *insn = &meta->insn;
1583 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
1584 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1586 return 0;
1589 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1591 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
1594 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1596 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
1599 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1601 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
1604 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1606 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1609 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1611 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
1614 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1616 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1619 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1621 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
1624 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1626 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
1629 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1631 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
1634 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1636 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
1639 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1641 u8 dst = meta->insn.dst_reg * 2;
1643 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
1644 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1646 return 0;
1649 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1651 const struct bpf_insn *insn = &meta->insn;
1653 if (!insn->imm)
1654 return 1; /* TODO: zero shift means indirect */
1656 emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
1657 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
1658 SHF_SC_L_SHF, insn->imm);
1659 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1661 return 0;
1664 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1666 const struct bpf_insn *insn = &meta->insn;
1667 u8 gpr = insn->dst_reg * 2;
1669 switch (insn->imm) {
1670 case 16:
1671 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
1672 SHF_SC_R_ROT, 8);
1673 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
1674 SHF_SC_R_SHF, 16);
1676 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1677 break;
1678 case 32:
1679 wrp_end32(nfp_prog, reg_a(gpr), gpr);
1680 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1681 break;
1682 case 64:
1683 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
1685 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
1686 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
1687 break;
1690 return 0;
1693 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1695 struct nfp_insn_meta *prev = nfp_meta_prev(meta);
1696 u32 imm_lo, imm_hi;
1697 u8 dst;
1699 dst = prev->insn.dst_reg * 2;
1700 imm_lo = prev->insn.imm;
1701 imm_hi = meta->insn.imm;
1703 wrp_immed(nfp_prog, reg_both(dst), imm_lo);
1705 /* mov is always 1 insn, load imm may be two, so try to use mov */
1706 if (imm_hi == imm_lo)
1707 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
1708 else
1709 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
1711 return 0;
1714 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1716 meta->double_cb = imm_ld8_part2;
1717 return 0;
1720 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1722 return construct_data_ld(nfp_prog, meta->insn.imm, 1);
1725 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1727 return construct_data_ld(nfp_prog, meta->insn.imm, 2);
1730 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1732 return construct_data_ld(nfp_prog, meta->insn.imm, 4);
1735 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1737 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1738 meta->insn.src_reg * 2, 1);
1741 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1743 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1744 meta->insn.src_reg * 2, 2);
1747 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1749 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
1750 meta->insn.src_reg * 2, 4);
1753 static int
1754 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1755 unsigned int size, unsigned int ptr_off)
1757 return mem_op_stack(nfp_prog, meta, size, ptr_off,
1758 meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
1759 true, wrp_lmem_load);
1762 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1763 u8 size)
1765 swreg dst = reg_both(meta->insn.dst_reg * 2);
1767 switch (meta->insn.off) {
1768 case offsetof(struct __sk_buff, len):
1769 if (size != FIELD_SIZEOF(struct __sk_buff, len))
1770 return -EOPNOTSUPP;
1771 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
1772 break;
1773 case offsetof(struct __sk_buff, data):
1774 if (size != FIELD_SIZEOF(struct __sk_buff, data))
1775 return -EOPNOTSUPP;
1776 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1777 break;
1778 case offsetof(struct __sk_buff, data_end):
1779 if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
1780 return -EOPNOTSUPP;
1781 emit_alu(nfp_prog, dst,
1782 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1783 break;
1784 default:
1785 return -EOPNOTSUPP;
1788 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1790 return 0;
1793 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1794 u8 size)
1796 swreg dst = reg_both(meta->insn.dst_reg * 2);
1798 switch (meta->insn.off) {
1799 case offsetof(struct xdp_md, data):
1800 if (size != FIELD_SIZEOF(struct xdp_md, data))
1801 return -EOPNOTSUPP;
1802 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
1803 break;
1804 case offsetof(struct xdp_md, data_end):
1805 if (size != FIELD_SIZEOF(struct xdp_md, data_end))
1806 return -EOPNOTSUPP;
1807 emit_alu(nfp_prog, dst,
1808 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
1809 break;
1810 default:
1811 return -EOPNOTSUPP;
1814 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1816 return 0;
1819 static int
1820 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1821 unsigned int size)
1823 swreg tmp_reg;
1825 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1827 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
1828 tmp_reg, meta->insn.dst_reg * 2, size);
1831 static int
1832 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1833 unsigned int size)
1835 swreg tmp_reg;
1837 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1839 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
1840 tmp_reg, meta->insn.dst_reg * 2, size);
1843 static int
1844 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1845 unsigned int size)
1847 if (meta->ldst_gather_len)
1848 return nfp_cpp_memcpy(nfp_prog, meta);
1850 if (meta->ptr.type == PTR_TO_CTX) {
1851 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
1852 return mem_ldx_xdp(nfp_prog, meta, size);
1853 else
1854 return mem_ldx_skb(nfp_prog, meta, size);
1857 if (meta->ptr.type == PTR_TO_PACKET)
1858 return mem_ldx_data(nfp_prog, meta, size);
1860 if (meta->ptr.type == PTR_TO_STACK)
1861 return mem_ldx_stack(nfp_prog, meta, size,
1862 meta->ptr.off + meta->ptr.var_off.value);
1864 if (meta->ptr.type == PTR_TO_MAP_VALUE)
1865 return mem_ldx_emem(nfp_prog, meta, size);
1867 return -EOPNOTSUPP;
1870 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1872 return mem_ldx(nfp_prog, meta, 1);
1875 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1877 return mem_ldx(nfp_prog, meta, 2);
1880 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1882 return mem_ldx(nfp_prog, meta, 4);
1885 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1887 return mem_ldx(nfp_prog, meta, 8);
1890 static int
1891 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1892 unsigned int size)
1894 u64 imm = meta->insn.imm; /* sign extend */
1895 swreg off_reg;
1897 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1899 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1900 imm, size);
1903 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1904 unsigned int size)
1906 if (meta->ptr.type == PTR_TO_PACKET)
1907 return mem_st_data(nfp_prog, meta, size);
1909 return -EOPNOTSUPP;
1912 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1914 return mem_st(nfp_prog, meta, 1);
1917 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1919 return mem_st(nfp_prog, meta, 2);
1922 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1924 return mem_st(nfp_prog, meta, 4);
1927 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1929 return mem_st(nfp_prog, meta, 8);
1932 static int
1933 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1934 unsigned int size)
1936 swreg off_reg;
1938 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
1940 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
1941 meta->insn.src_reg * 2, size);
1944 static int
1945 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1946 unsigned int size, unsigned int ptr_off)
1948 return mem_op_stack(nfp_prog, meta, size, ptr_off,
1949 meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
1950 false, wrp_lmem_store);
1953 static int
1954 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1955 unsigned int size)
1957 if (meta->ptr.type == PTR_TO_PACKET)
1958 return mem_stx_data(nfp_prog, meta, size);
1960 if (meta->ptr.type == PTR_TO_STACK)
1961 return mem_stx_stack(nfp_prog, meta, size,
1962 meta->ptr.off + meta->ptr.var_off.value);
1964 return -EOPNOTSUPP;
1967 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1969 return mem_stx(nfp_prog, meta, 1);
1972 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1974 return mem_stx(nfp_prog, meta, 2);
1977 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1979 return mem_stx(nfp_prog, meta, 4);
1982 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1984 return mem_stx(nfp_prog, meta, 8);
1987 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1989 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
1991 return 0;
1994 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1996 const struct bpf_insn *insn = &meta->insn;
1997 u64 imm = insn->imm; /* sign extend */
1998 swreg or1, or2, tmp_reg;
2000 or1 = reg_a(insn->dst_reg * 2);
2001 or2 = reg_b(insn->dst_reg * 2 + 1);
2003 if (imm & ~0U) {
2004 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2005 emit_alu(nfp_prog, imm_a(nfp_prog),
2006 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2007 or1 = imm_a(nfp_prog);
2010 if (imm >> 32) {
2011 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2012 emit_alu(nfp_prog, imm_b(nfp_prog),
2013 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2014 or2 = imm_b(nfp_prog);
2017 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
2018 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2020 return 0;
2023 static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2025 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
2028 static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2030 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
2033 static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2035 return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
2038 static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2040 return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
2043 static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2045 return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true);
2048 static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2050 return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false);
2053 static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2055 return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false);
2058 static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2060 return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true);
2063 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2065 const struct bpf_insn *insn = &meta->insn;
2066 u64 imm = insn->imm; /* sign extend */
2067 swreg tmp_reg;
2069 if (!imm) {
2070 meta->skip = true;
2071 return 0;
2074 if (imm & ~0U) {
2075 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2076 emit_alu(nfp_prog, reg_none(),
2077 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
2078 emit_br(nfp_prog, BR_BNE, insn->off, 0);
2081 if (imm >> 32) {
2082 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2083 emit_alu(nfp_prog, reg_none(),
2084 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
2085 emit_br(nfp_prog, BR_BNE, insn->off, 0);
2088 return 0;
2091 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2093 const struct bpf_insn *insn = &meta->insn;
2094 u64 imm = insn->imm; /* sign extend */
2095 swreg tmp_reg;
2097 if (!imm) {
2098 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
2099 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
2100 emit_br(nfp_prog, BR_BNE, insn->off, 0);
2101 return 0;
2104 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2105 emit_alu(nfp_prog, reg_none(),
2106 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2107 emit_br(nfp_prog, BR_BNE, insn->off, 0);
2109 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
2110 emit_alu(nfp_prog, reg_none(),
2111 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
2112 emit_br(nfp_prog, BR_BNE, insn->off, 0);
2114 return 0;
2117 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2119 const struct bpf_insn *insn = &meta->insn;
2121 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
2122 ALU_OP_XOR, reg_b(insn->src_reg * 2));
2123 emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
2124 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
2125 emit_alu(nfp_prog, reg_none(),
2126 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
2127 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
2129 return 0;
2132 static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2134 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
2137 static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2139 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
2142 static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2144 return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
2147 static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2149 return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
2152 static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2154 return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true);
2157 static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2159 return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false);
2162 static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2164 return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false);
2167 static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2169 return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true);
2172 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2174 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
2177 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2179 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
2182 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2184 switch (meta->insn.imm) {
2185 case BPF_FUNC_xdp_adjust_head:
2186 return adjust_head(nfp_prog, meta);
2187 case BPF_FUNC_map_lookup_elem:
2188 return map_lookup_stack(nfp_prog, meta);
2189 default:
2190 WARN_ONCE(1, "verifier allowed unsupported function\n");
2191 return -EOPNOTSUPP;
2195 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2197 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
2199 return 0;
2202 static const instr_cb_t instr_cb[256] = {
2203 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64,
2204 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64,
2205 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64,
2206 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64,
2207 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64,
2208 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64,
2209 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64,
2210 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64,
2211 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64,
2212 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
2213 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
2214 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
2215 [BPF_ALU64 | BPF_NEG] = neg_reg64,
2216 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
2217 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
2218 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
2219 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
2220 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
2221 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm,
2222 [BPF_ALU | BPF_AND | BPF_X] = and_reg,
2223 [BPF_ALU | BPF_AND | BPF_K] = and_imm,
2224 [BPF_ALU | BPF_OR | BPF_X] = or_reg,
2225 [BPF_ALU | BPF_OR | BPF_K] = or_imm,
2226 [BPF_ALU | BPF_ADD | BPF_X] = add_reg,
2227 [BPF_ALU | BPF_ADD | BPF_K] = add_imm,
2228 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg,
2229 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm,
2230 [BPF_ALU | BPF_NEG] = neg_reg,
2231 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
2232 [BPF_ALU | BPF_END | BPF_X] = end_reg32,
2233 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
2234 [BPF_LD | BPF_ABS | BPF_B] = data_ld1,
2235 [BPF_LD | BPF_ABS | BPF_H] = data_ld2,
2236 [BPF_LD | BPF_ABS | BPF_W] = data_ld4,
2237 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1,
2238 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2,
2239 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4,
2240 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1,
2241 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2,
2242 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4,
2243 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8,
2244 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1,
2245 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
2246 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
2247 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
2248 [BPF_ST | BPF_MEM | BPF_B] = mem_st1,
2249 [BPF_ST | BPF_MEM | BPF_H] = mem_st2,
2250 [BPF_ST | BPF_MEM | BPF_W] = mem_st4,
2251 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
2252 [BPF_JMP | BPF_JA | BPF_K] = jump,
2253 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
2254 [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm,
2255 [BPF_JMP | BPF_JGE | BPF_K] = jge_imm,
2256 [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm,
2257 [BPF_JMP | BPF_JLE | BPF_K] = jle_imm,
2258 [BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm,
2259 [BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm,
2260 [BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm,
2261 [BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm,
2262 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
2263 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
2264 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
2265 [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg,
2266 [BPF_JMP | BPF_JGE | BPF_X] = jge_reg,
2267 [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg,
2268 [BPF_JMP | BPF_JLE | BPF_X] = jle_reg,
2269 [BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg,
2270 [BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg,
2271 [BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg,
2272 [BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg,
2273 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
2274 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
2275 [BPF_JMP | BPF_CALL] = call,
2276 [BPF_JMP | BPF_EXIT] = goto_out,
2279 /* --- Assembler logic --- */
2280 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
2282 struct nfp_insn_meta *meta, *jmp_dst;
2283 u32 idx, br_idx;
2285 list_for_each_entry(meta, &nfp_prog->insns, l) {
2286 if (meta->skip)
2287 continue;
2288 if (meta->insn.code == (BPF_JMP | BPF_CALL))
2289 continue;
2290 if (BPF_CLASS(meta->insn.code) != BPF_JMP)
2291 continue;
2293 if (list_is_last(&meta->l, &nfp_prog->insns))
2294 br_idx = nfp_prog->last_bpf_off;
2295 else
2296 br_idx = list_next_entry(meta, l)->off - 1;
2298 if (!nfp_is_br(nfp_prog->prog[br_idx])) {
2299 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
2300 br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
2301 return -ELOOP;
2303 /* Leave special branches for later */
2304 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
2305 RELO_BR_REL)
2306 continue;
2308 if (!meta->jmp_dst) {
2309 pr_err("Non-exit jump doesn't have destination info recorded!!\n");
2310 return -ELOOP;
2313 jmp_dst = meta->jmp_dst;
2315 if (jmp_dst->skip) {
2316 pr_err("Branch landing on removed instruction!!\n");
2317 return -ELOOP;
2320 for (idx = meta->off; idx <= br_idx; idx++) {
2321 if (!nfp_is_br(nfp_prog->prog[idx]))
2322 continue;
2323 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
2327 return 0;
2330 static void nfp_intro(struct nfp_prog *nfp_prog)
2332 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
2333 emit_alu(nfp_prog, plen_reg(nfp_prog),
2334 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
2337 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
2339 /* TC direct-action mode:
2340 * 0,1 ok NOT SUPPORTED[1]
2341 * 2 drop 0x22 -> drop, count as stat1
2342 * 4,5 nuke 0x02 -> drop
2343 * 7 redir 0x44 -> redir, count as stat2
2344 * * unspec 0x11 -> pass, count as stat0
2346 * [1] We can't support OK and RECLASSIFY because we can't tell TC
2347 * the exact decision made. We are forced to support UNSPEC
2348 * to handle aborts so that's the only one we handle for passing
2349 * packets up the stack.
2351 /* Target for aborts */
2352 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2354 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2356 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2357 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
2359 /* Target for normal exits */
2360 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2362 /* if R0 > 7 jump to abort */
2363 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
2364 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2365 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2367 wrp_immed(nfp_prog, reg_b(2), 0x41221211);
2368 wrp_immed(nfp_prog, reg_b(3), 0x41001211);
2370 emit_shf(nfp_prog, reg_a(1),
2371 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
2373 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2374 emit_shf(nfp_prog, reg_a(2),
2375 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2377 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2378 emit_shf(nfp_prog, reg_b(2),
2379 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
2381 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2383 emit_shf(nfp_prog, reg_b(2),
2384 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
2385 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2388 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
2390 /* XDP return codes:
2391 * 0 aborted 0x82 -> drop, count as stat3
2392 * 1 drop 0x22 -> drop, count as stat1
2393 * 2 pass 0x11 -> pass, count as stat0
2394 * 3 tx 0x44 -> redir, count as stat2
2395 * * unknown 0x82 -> drop, count as stat3
2397 /* Target for aborts */
2398 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
2400 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2402 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2403 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
2405 /* Target for normal exits */
2406 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
2408 /* if R0 > 3 jump to abort */
2409 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
2410 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
2412 wrp_immed(nfp_prog, reg_b(2), 0x44112282);
2414 emit_shf(nfp_prog, reg_a(1),
2415 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
2417 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
2418 emit_shf(nfp_prog, reg_b(2),
2419 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
2421 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
2423 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
2424 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
2427 static void nfp_outro(struct nfp_prog *nfp_prog)
2429 switch (nfp_prog->type) {
2430 case BPF_PROG_TYPE_SCHED_CLS:
2431 nfp_outro_tc_da(nfp_prog);
2432 break;
2433 case BPF_PROG_TYPE_XDP:
2434 nfp_outro_xdp(nfp_prog);
2435 break;
2436 default:
2437 WARN_ON(1);
2441 static int nfp_translate(struct nfp_prog *nfp_prog)
2443 struct nfp_insn_meta *meta;
2444 int err;
2446 nfp_intro(nfp_prog);
2447 if (nfp_prog->error)
2448 return nfp_prog->error;
2450 list_for_each_entry(meta, &nfp_prog->insns, l) {
2451 instr_cb_t cb = instr_cb[meta->insn.code];
2453 meta->off = nfp_prog_current_offset(nfp_prog);
2455 if (meta->skip) {
2456 nfp_prog->n_translated++;
2457 continue;
2460 if (nfp_meta_has_prev(nfp_prog, meta) &&
2461 nfp_meta_prev(meta)->double_cb)
2462 cb = nfp_meta_prev(meta)->double_cb;
2463 if (!cb)
2464 return -ENOENT;
2465 err = cb(nfp_prog, meta);
2466 if (err)
2467 return err;
2468 if (nfp_prog->error)
2469 return nfp_prog->error;
2471 nfp_prog->n_translated++;
2474 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
2476 nfp_outro(nfp_prog);
2477 if (nfp_prog->error)
2478 return nfp_prog->error;
2480 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
2481 if (nfp_prog->error)
2482 return nfp_prog->error;
2484 return nfp_fixup_branches(nfp_prog);
2487 /* --- Optimizations --- */
2488 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
2490 struct nfp_insn_meta *meta;
2492 list_for_each_entry(meta, &nfp_prog->insns, l) {
2493 struct bpf_insn insn = meta->insn;
2495 /* Programs converted from cBPF start with register xoring */
2496 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
2497 insn.src_reg == insn.dst_reg)
2498 continue;
2500 /* Programs start with R6 = R1 but we ignore the skb pointer */
2501 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
2502 insn.src_reg == 1 && insn.dst_reg == 6)
2503 meta->skip = true;
2505 /* Return as soon as something doesn't match */
2506 if (!meta->skip)
2507 return;
2511 /* Remove masking after load since our load guarantees this is not needed */
2512 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
2514 struct nfp_insn_meta *meta1, *meta2;
2515 const s32 exp_mask[] = {
2516 [BPF_B] = 0x000000ffU,
2517 [BPF_H] = 0x0000ffffU,
2518 [BPF_W] = 0xffffffffU,
2521 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2522 struct bpf_insn insn, next;
2524 insn = meta1->insn;
2525 next = meta2->insn;
2527 if (BPF_CLASS(insn.code) != BPF_LD)
2528 continue;
2529 if (BPF_MODE(insn.code) != BPF_ABS &&
2530 BPF_MODE(insn.code) != BPF_IND)
2531 continue;
2533 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
2534 continue;
2536 if (!exp_mask[BPF_SIZE(insn.code)])
2537 continue;
2538 if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
2539 continue;
2541 if (next.src_reg || next.dst_reg)
2542 continue;
2544 if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
2545 continue;
2547 meta2->skip = true;
2551 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
2553 struct nfp_insn_meta *meta1, *meta2, *meta3;
2555 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
2556 struct bpf_insn insn, next1, next2;
2558 insn = meta1->insn;
2559 next1 = meta2->insn;
2560 next2 = meta3->insn;
2562 if (BPF_CLASS(insn.code) != BPF_LD)
2563 continue;
2564 if (BPF_MODE(insn.code) != BPF_ABS &&
2565 BPF_MODE(insn.code) != BPF_IND)
2566 continue;
2567 if (BPF_SIZE(insn.code) != BPF_W)
2568 continue;
2570 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
2571 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
2572 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
2573 next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
2574 continue;
2576 if (next1.src_reg || next1.dst_reg ||
2577 next2.src_reg || next2.dst_reg)
2578 continue;
2580 if (next1.imm != 0x20 || next2.imm != 0x20)
2581 continue;
2583 if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
2584 meta3->flags & FLAG_INSN_IS_JUMP_DST)
2585 continue;
2587 meta2->skip = true;
2588 meta3->skip = true;
2592 /* load/store pair that forms memory copy sould look like the following:
2594 * ld_width R, [addr_src + offset_src]
2595 * st_width [addr_dest + offset_dest], R
2597 * The destination register of load and source register of store should
2598 * be the same, load and store should also perform at the same width.
2599 * If either of addr_src or addr_dest is stack pointer, we don't do the
2600 * CPP optimization as stack is modelled by registers on NFP.
2602 static bool
2603 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
2604 struct nfp_insn_meta *st_meta)
2606 struct bpf_insn *ld = &ld_meta->insn;
2607 struct bpf_insn *st = &st_meta->insn;
2609 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
2610 return false;
2612 if (ld_meta->ptr.type != PTR_TO_PACKET)
2613 return false;
2615 if (st_meta->ptr.type != PTR_TO_PACKET)
2616 return false;
2618 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
2619 return false;
2621 if (ld->dst_reg != st->src_reg)
2622 return false;
2624 /* There is jump to the store insn in this pair. */
2625 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
2626 return false;
2628 return true;
2631 /* Currently, we only support chaining load/store pairs if:
2633 * - Their address base registers are the same.
2634 * - Their address offsets are in the same order.
2635 * - They operate at the same memory width.
2636 * - There is no jump into the middle of them.
2638 static bool
2639 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
2640 struct nfp_insn_meta *st_meta,
2641 struct bpf_insn *prev_ld,
2642 struct bpf_insn *prev_st)
2644 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
2645 struct bpf_insn *ld = &ld_meta->insn;
2646 struct bpf_insn *st = &st_meta->insn;
2647 s16 prev_ld_off, prev_st_off;
2649 /* This pair is the start pair. */
2650 if (!prev_ld)
2651 return true;
2653 prev_size = BPF_LDST_BYTES(prev_ld);
2654 curr_size = BPF_LDST_BYTES(ld);
2655 prev_ld_base = prev_ld->src_reg;
2656 prev_st_base = prev_st->dst_reg;
2657 prev_ld_dst = prev_ld->dst_reg;
2658 prev_ld_off = prev_ld->off;
2659 prev_st_off = prev_st->off;
2661 if (ld->dst_reg != prev_ld_dst)
2662 return false;
2664 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
2665 return false;
2667 if (curr_size != prev_size)
2668 return false;
2670 /* There is jump to the head of this pair. */
2671 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
2672 return false;
2674 /* Both in ascending order. */
2675 if (prev_ld_off + prev_size == ld->off &&
2676 prev_st_off + prev_size == st->off)
2677 return true;
2679 /* Both in descending order. */
2680 if (ld->off + curr_size == prev_ld_off &&
2681 st->off + curr_size == prev_st_off)
2682 return true;
2684 return false;
2687 /* Return TRUE if cross memory access happens. Cross memory access means
2688 * store area is overlapping with load area that a later load might load
2689 * the value from previous store, for this case we can't treat the sequence
2690 * as an memory copy.
2692 static bool
2693 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
2694 struct nfp_insn_meta *head_st_meta)
2696 s16 head_ld_off, head_st_off, ld_off;
2698 /* Different pointer types does not overlap. */
2699 if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
2700 return false;
2702 /* load and store are both PTR_TO_PACKET, check ID info. */
2703 if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
2704 return true;
2706 /* Canonicalize the offsets. Turn all of them against the original
2707 * base register.
2709 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
2710 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
2711 ld_off = ld->off + head_ld_meta->ptr.off;
2713 /* Ascending order cross. */
2714 if (ld_off > head_ld_off &&
2715 head_ld_off < head_st_off && ld_off >= head_st_off)
2716 return true;
2718 /* Descending order cross. */
2719 if (ld_off < head_ld_off &&
2720 head_ld_off > head_st_off && ld_off <= head_st_off)
2721 return true;
2723 return false;
2726 /* This pass try to identify the following instructoin sequences.
2728 * load R, [regA + offA]
2729 * store [regB + offB], R
2730 * load R, [regA + offA + const_imm_A]
2731 * store [regB + offB + const_imm_A], R
2732 * load R, [regA + offA + 2 * const_imm_A]
2733 * store [regB + offB + 2 * const_imm_A], R
2734 * ...
2736 * Above sequence is typically generated by compiler when lowering
2737 * memcpy. NFP prefer using CPP instructions to accelerate it.
2739 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
2741 struct nfp_insn_meta *head_ld_meta = NULL;
2742 struct nfp_insn_meta *head_st_meta = NULL;
2743 struct nfp_insn_meta *meta1, *meta2;
2744 struct bpf_insn *prev_ld = NULL;
2745 struct bpf_insn *prev_st = NULL;
2746 u8 count = 0;
2748 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
2749 struct bpf_insn *ld = &meta1->insn;
2750 struct bpf_insn *st = &meta2->insn;
2752 /* Reset record status if any of the following if true:
2753 * - The current insn pair is not load/store.
2754 * - The load/store pair doesn't chain with previous one.
2755 * - The chained load/store pair crossed with previous pair.
2756 * - The chained load/store pair has a total size of memory
2757 * copy beyond 128 bytes which is the maximum length a
2758 * single NFP CPP command can transfer.
2760 if (!curr_pair_is_memcpy(meta1, meta2) ||
2761 !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
2762 prev_st) ||
2763 (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
2764 head_st_meta) ||
2765 head_ld_meta->ldst_gather_len >= 128))) {
2766 if (!count)
2767 continue;
2769 if (count > 1) {
2770 s16 prev_ld_off = prev_ld->off;
2771 s16 prev_st_off = prev_st->off;
2772 s16 head_ld_off = head_ld_meta->insn.off;
2774 if (prev_ld_off < head_ld_off) {
2775 head_ld_meta->insn.off = prev_ld_off;
2776 head_st_meta->insn.off = prev_st_off;
2777 head_ld_meta->ldst_gather_len =
2778 -head_ld_meta->ldst_gather_len;
2781 head_ld_meta->paired_st = &head_st_meta->insn;
2782 head_st_meta->skip = true;
2783 } else {
2784 head_ld_meta->ldst_gather_len = 0;
2787 /* If the chain is ended by an load/store pair then this
2788 * could serve as the new head of the the next chain.
2790 if (curr_pair_is_memcpy(meta1, meta2)) {
2791 head_ld_meta = meta1;
2792 head_st_meta = meta2;
2793 head_ld_meta->ldst_gather_len =
2794 BPF_LDST_BYTES(ld);
2795 meta1 = nfp_meta_next(meta1);
2796 meta2 = nfp_meta_next(meta2);
2797 prev_ld = ld;
2798 prev_st = st;
2799 count = 1;
2800 } else {
2801 head_ld_meta = NULL;
2802 head_st_meta = NULL;
2803 prev_ld = NULL;
2804 prev_st = NULL;
2805 count = 0;
2808 continue;
2811 if (!head_ld_meta) {
2812 head_ld_meta = meta1;
2813 head_st_meta = meta2;
2814 } else {
2815 meta1->skip = true;
2816 meta2->skip = true;
2819 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
2820 meta1 = nfp_meta_next(meta1);
2821 meta2 = nfp_meta_next(meta2);
2822 prev_ld = ld;
2823 prev_st = st;
2824 count++;
2828 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
2830 nfp_bpf_opt_reg_init(nfp_prog);
2832 nfp_bpf_opt_ld_mask(nfp_prog);
2833 nfp_bpf_opt_ld_shift(nfp_prog);
2834 nfp_bpf_opt_ldst_gather(nfp_prog);
2836 return 0;
2839 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
2841 __le64 *ustore = (__force __le64 *)prog;
2842 int i;
2844 for (i = 0; i < len; i++) {
2845 int err;
2847 err = nfp_ustore_check_valid_no_ecc(prog[i]);
2848 if (err)
2849 return err;
2851 ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
2854 return 0;
2857 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
2859 void *prog;
2861 prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
2862 if (!prog)
2863 return;
2865 nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
2866 memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
2867 kvfree(nfp_prog->prog);
2868 nfp_prog->prog = prog;
2871 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
2873 int ret;
2875 ret = nfp_bpf_optimize(nfp_prog);
2876 if (ret)
2877 return ret;
2879 ret = nfp_translate(nfp_prog);
2880 if (ret) {
2881 pr_err("Translation failed with error %d (translated: %u)\n",
2882 ret, nfp_prog->n_translated);
2883 return -EINVAL;
2886 nfp_bpf_prog_trim(nfp_prog);
2888 return ret;
2891 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
2893 struct nfp_insn_meta *meta;
2895 /* Another pass to record jump information. */
2896 list_for_each_entry(meta, &nfp_prog->insns, l) {
2897 u64 code = meta->insn.code;
2899 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
2900 BPF_OP(code) != BPF_CALL) {
2901 struct nfp_insn_meta *dst_meta;
2902 unsigned short dst_indx;
2904 dst_indx = meta->n + 1 + meta->insn.off;
2905 dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
2906 cnt);
2908 meta->jmp_dst = dst_meta;
2909 dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
2914 bool nfp_bpf_supported_opcode(u8 code)
2916 return !!instr_cb[code];
2919 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
2921 unsigned int i;
2922 u64 *prog;
2923 int err;
2925 prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
2926 GFP_KERNEL);
2927 if (!prog)
2928 return ERR_PTR(-ENOMEM);
2930 for (i = 0; i < nfp_prog->prog_len; i++) {
2931 enum nfp_relo_type special;
2932 u32 val;
2934 special = FIELD_GET(OP_RELO_TYPE, prog[i]);
2935 switch (special) {
2936 case RELO_NONE:
2937 continue;
2938 case RELO_BR_REL:
2939 br_add_offset(&prog[i], bv->start_off);
2940 break;
2941 case RELO_BR_GO_OUT:
2942 br_set_offset(&prog[i],
2943 nfp_prog->tgt_out + bv->start_off);
2944 break;
2945 case RELO_BR_GO_ABORT:
2946 br_set_offset(&prog[i],
2947 nfp_prog->tgt_abort + bv->start_off);
2948 break;
2949 case RELO_BR_NEXT_PKT:
2950 br_set_offset(&prog[i], bv->tgt_done);
2951 break;
2952 case RELO_BR_HELPER:
2953 val = br_get_offset(prog[i]);
2954 val -= BR_OFF_RELO;
2955 switch (val) {
2956 case BPF_FUNC_map_lookup_elem:
2957 val = nfp_prog->bpf->helpers.map_lookup;
2958 break;
2959 default:
2960 pr_err("relocation of unknown helper %d\n",
2961 val);
2962 err = -EINVAL;
2963 goto err_free_prog;
2965 br_set_offset(&prog[i], val);
2966 break;
2967 case RELO_IMMED_REL:
2968 immed_add_value(&prog[i], bv->start_off);
2969 break;
2972 prog[i] &= ~OP_RELO_TYPE;
2975 err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
2976 if (err)
2977 goto err_free_prog;
2979 return prog;
2981 err_free_prog:
2982 kfree(prog);
2983 return ERR_PTR(err);