drm/panthor: Don't add write fences to the shared BOs
[drm/drm-misc.git] / arch / riscv / net / bpf_jit_comp64.c
blob99f34409fb60f48a92366c4824e6237c96f92680
1 // SPDX-License-Identifier: GPL-2.0
2 /* BPF JIT compiler for RV64G
4 * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com>
6 */
8 #include <linux/bitfield.h>
9 #include <linux/bpf.h>
10 #include <linux/filter.h>
11 #include <linux/memory.h>
12 #include <linux/stop_machine.h>
13 #include <asm/patch.h>
14 #include <asm/cfi.h>
15 #include <asm/percpu.h>
16 #include "bpf_jit.h"
18 #define RV_MAX_REG_ARGS 8
19 #define RV_FENTRY_NINSNS 2
20 #define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4)
21 /* imm that allows emit_imm to emit max count insns */
22 #define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF
24 #define RV_REG_TCC RV_REG_A6
25 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
26 #define RV_REG_ARENA RV_REG_S7 /* For storing arena_vm_start */
28 static const int regmap[] = {
29 [BPF_REG_0] = RV_REG_A5,
30 [BPF_REG_1] = RV_REG_A0,
31 [BPF_REG_2] = RV_REG_A1,
32 [BPF_REG_3] = RV_REG_A2,
33 [BPF_REG_4] = RV_REG_A3,
34 [BPF_REG_5] = RV_REG_A4,
35 [BPF_REG_6] = RV_REG_S1,
36 [BPF_REG_7] = RV_REG_S2,
37 [BPF_REG_8] = RV_REG_S3,
38 [BPF_REG_9] = RV_REG_S4,
39 [BPF_REG_FP] = RV_REG_S5,
40 [BPF_REG_AX] = RV_REG_T0,
43 static const int pt_regmap[] = {
44 [RV_REG_A0] = offsetof(struct pt_regs, a0),
45 [RV_REG_A1] = offsetof(struct pt_regs, a1),
46 [RV_REG_A2] = offsetof(struct pt_regs, a2),
47 [RV_REG_A3] = offsetof(struct pt_regs, a3),
48 [RV_REG_A4] = offsetof(struct pt_regs, a4),
49 [RV_REG_A5] = offsetof(struct pt_regs, a5),
50 [RV_REG_S1] = offsetof(struct pt_regs, s1),
51 [RV_REG_S2] = offsetof(struct pt_regs, s2),
52 [RV_REG_S3] = offsetof(struct pt_regs, s3),
53 [RV_REG_S4] = offsetof(struct pt_regs, s4),
54 [RV_REG_S5] = offsetof(struct pt_regs, s5),
55 [RV_REG_T0] = offsetof(struct pt_regs, t0),
58 enum {
59 RV_CTX_F_SEEN_TAIL_CALL = 0,
60 RV_CTX_F_SEEN_CALL = RV_REG_RA,
61 RV_CTX_F_SEEN_S1 = RV_REG_S1,
62 RV_CTX_F_SEEN_S2 = RV_REG_S2,
63 RV_CTX_F_SEEN_S3 = RV_REG_S3,
64 RV_CTX_F_SEEN_S4 = RV_REG_S4,
65 RV_CTX_F_SEEN_S5 = RV_REG_S5,
66 RV_CTX_F_SEEN_S6 = RV_REG_S6,
69 static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
71 u8 reg = regmap[bpf_reg];
73 switch (reg) {
74 case RV_CTX_F_SEEN_S1:
75 case RV_CTX_F_SEEN_S2:
76 case RV_CTX_F_SEEN_S3:
77 case RV_CTX_F_SEEN_S4:
78 case RV_CTX_F_SEEN_S5:
79 case RV_CTX_F_SEEN_S6:
80 __set_bit(reg, &ctx->flags);
82 return reg;
85 static bool seen_reg(int reg, struct rv_jit_context *ctx)
87 switch (reg) {
88 case RV_CTX_F_SEEN_CALL:
89 case RV_CTX_F_SEEN_S1:
90 case RV_CTX_F_SEEN_S2:
91 case RV_CTX_F_SEEN_S3:
92 case RV_CTX_F_SEEN_S4:
93 case RV_CTX_F_SEEN_S5:
94 case RV_CTX_F_SEEN_S6:
95 return test_bit(reg, &ctx->flags);
97 return false;
100 static void mark_fp(struct rv_jit_context *ctx)
102 __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags);
105 static void mark_call(struct rv_jit_context *ctx)
107 __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
110 static bool seen_call(struct rv_jit_context *ctx)
112 return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
115 static void mark_tail_call(struct rv_jit_context *ctx)
117 __set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
120 static bool seen_tail_call(struct rv_jit_context *ctx)
122 return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
125 static u8 rv_tail_call_reg(struct rv_jit_context *ctx)
127 mark_tail_call(ctx);
129 if (seen_call(ctx)) {
130 __set_bit(RV_CTX_F_SEEN_S6, &ctx->flags);
131 return RV_REG_S6;
133 return RV_REG_A6;
136 static bool is_32b_int(s64 val)
138 return -(1L << 31) <= val && val < (1L << 31);
141 static bool in_auipc_jalr_range(s64 val)
144 * auipc+jalr can reach any signed PC-relative offset in the range
145 * [-2^31 - 2^11, 2^31 - 2^11).
147 return (-(1L << 31) - (1L << 11)) <= val &&
148 val < ((1L << 31) - (1L << 11));
151 /* Modify rd pointer to alternate reg to avoid corrupting original reg */
152 static void emit_sextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx)
154 emit_sextw(ra, *rd, ctx);
155 *rd = ra;
158 static void emit_zextw_alt(u8 *rd, u8 ra, struct rv_jit_context *ctx)
160 emit_zextw(ra, *rd, ctx);
161 *rd = ra;
164 /* Emit fixed-length instructions for address */
165 static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
168 * Use the ro_insns(RX) to calculate the offset as the BPF program will
169 * finally run from this memory region.
171 u64 ip = (u64)(ctx->ro_insns + ctx->ninsns);
172 s64 off = addr - ip;
173 s64 upper = (off + (1 << 11)) >> 12;
174 s64 lower = off & 0xfff;
176 if (extra_pass && !in_auipc_jalr_range(off)) {
177 pr_err("bpf-jit: target offset 0x%llx is out of range\n", off);
178 return -ERANGE;
181 emit(rv_auipc(rd, upper), ctx);
182 emit(rv_addi(rd, rd, lower), ctx);
183 return 0;
186 /* Emit variable-length instructions for 32-bit and 64-bit imm */
187 static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
189 /* Note that the immediate from the add is sign-extended,
190 * which means that we need to compensate this by adding 2^12,
191 * when the 12th bit is set. A simpler way of doing this, and
192 * getting rid of the check, is to just add 2**11 before the
193 * shift. The "Loading a 32-Bit constant" example from the
194 * "Computer Organization and Design, RISC-V edition" book by
195 * Patterson/Hennessy highlights this fact.
197 * This also means that we need to process LSB to MSB.
199 s64 upper = (val + (1 << 11)) >> 12;
200 /* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
201 * and addi are signed and RVC checks will perform signed comparisons.
203 s64 lower = ((val & 0xfff) << 52) >> 52;
204 int shift;
206 if (is_32b_int(val)) {
207 if (upper)
208 emit_lui(rd, upper, ctx);
210 if (!upper) {
211 emit_li(rd, lower, ctx);
212 return;
215 emit_addiw(rd, rd, lower, ctx);
216 return;
219 shift = __ffs(upper);
220 upper >>= shift;
221 shift += 12;
223 emit_imm(rd, upper, ctx);
225 emit_slli(rd, rd, shift, ctx);
226 if (lower)
227 emit_addi(rd, rd, lower, ctx);
230 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
232 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
234 if (seen_reg(RV_REG_RA, ctx)) {
235 emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
236 store_offset -= 8;
238 emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
239 store_offset -= 8;
240 if (seen_reg(RV_REG_S1, ctx)) {
241 emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
242 store_offset -= 8;
244 if (seen_reg(RV_REG_S2, ctx)) {
245 emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
246 store_offset -= 8;
248 if (seen_reg(RV_REG_S3, ctx)) {
249 emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
250 store_offset -= 8;
252 if (seen_reg(RV_REG_S4, ctx)) {
253 emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
254 store_offset -= 8;
256 if (seen_reg(RV_REG_S5, ctx)) {
257 emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
258 store_offset -= 8;
260 if (seen_reg(RV_REG_S6, ctx)) {
261 emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
262 store_offset -= 8;
264 if (ctx->arena_vm_start) {
265 emit_ld(RV_REG_ARENA, store_offset, RV_REG_SP, ctx);
266 store_offset -= 8;
269 emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
270 /* Set return value. */
271 if (!is_tail_call)
272 emit_addiw(RV_REG_A0, RV_REG_A5, 0, ctx);
273 emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
274 is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */
275 ctx);
278 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
279 struct rv_jit_context *ctx)
281 switch (cond) {
282 case BPF_JEQ:
283 emit(rv_beq(rd, rs, rvoff >> 1), ctx);
284 return;
285 case BPF_JGT:
286 emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
287 return;
288 case BPF_JLT:
289 emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
290 return;
291 case BPF_JGE:
292 emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
293 return;
294 case BPF_JLE:
295 emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
296 return;
297 case BPF_JNE:
298 emit(rv_bne(rd, rs, rvoff >> 1), ctx);
299 return;
300 case BPF_JSGT:
301 emit(rv_blt(rs, rd, rvoff >> 1), ctx);
302 return;
303 case BPF_JSLT:
304 emit(rv_blt(rd, rs, rvoff >> 1), ctx);
305 return;
306 case BPF_JSGE:
307 emit(rv_bge(rd, rs, rvoff >> 1), ctx);
308 return;
309 case BPF_JSLE:
310 emit(rv_bge(rs, rd, rvoff >> 1), ctx);
314 static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
315 struct rv_jit_context *ctx)
317 s64 upper, lower;
319 if (is_13b_int(rvoff)) {
320 emit_bcc(cond, rd, rs, rvoff, ctx);
321 return;
324 /* Adjust for jal */
325 rvoff -= 4;
327 /* Transform, e.g.:
328 * bne rd,rs,foo
329 * to
330 * beq rd,rs,<.L1>
331 * (auipc foo)
332 * jal(r) foo
333 * .L1
335 cond = invert_bpf_cond(cond);
336 if (is_21b_int(rvoff)) {
337 emit_bcc(cond, rd, rs, 8, ctx);
338 emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
339 return;
342 /* 32b No need for an additional rvoff adjustment, since we
343 * get that from the auipc at PC', where PC = PC' + 4.
345 upper = (rvoff + (1 << 11)) >> 12;
346 lower = rvoff & 0xfff;
348 emit_bcc(cond, rd, rs, 12, ctx);
349 emit(rv_auipc(RV_REG_T1, upper), ctx);
350 emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
353 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
355 int tc_ninsn, off, start_insn = ctx->ninsns;
356 u8 tcc = rv_tail_call_reg(ctx);
358 /* a0: &ctx
359 * a1: &array
360 * a2: index
362 * if (index >= array->map.max_entries)
363 * goto out;
365 tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] :
366 ctx->offset[0];
367 emit_zextw(RV_REG_A2, RV_REG_A2, ctx);
369 off = offsetof(struct bpf_array, map.max_entries);
370 if (is_12b_check(off, insn))
371 return -1;
372 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
373 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
374 emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
376 /* if (--TCC < 0)
377 * goto out;
379 emit_addi(RV_REG_TCC, tcc, -1, ctx);
380 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
381 emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
383 /* prog = array->ptrs[index];
384 * if (!prog)
385 * goto out;
387 emit_sh3add(RV_REG_T2, RV_REG_A2, RV_REG_A1, ctx);
388 off = offsetof(struct bpf_array, ptrs);
389 if (is_12b_check(off, insn))
390 return -1;
391 emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
392 off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
393 emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
395 /* goto *(prog->bpf_func + 4); */
396 off = offsetof(struct bpf_prog, bpf_func);
397 if (is_12b_check(off, insn))
398 return -1;
399 emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
400 __build_epilogue(true, ctx);
401 return 0;
404 static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
405 struct rv_jit_context *ctx)
407 u8 code = insn->code;
409 switch (code) {
410 case BPF_JMP | BPF_JA:
411 case BPF_JMP | BPF_CALL:
412 case BPF_JMP | BPF_EXIT:
413 case BPF_JMP | BPF_TAIL_CALL:
414 break;
415 default:
416 *rd = bpf_to_rv_reg(insn->dst_reg, ctx);
419 if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) ||
420 code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) ||
421 code & BPF_LDX || code & BPF_STX)
422 *rs = bpf_to_rv_reg(insn->src_reg, ctx);
425 static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr,
426 struct rv_jit_context *ctx)
428 s64 upper, lower;
430 if (rvoff && fixed_addr && is_21b_int(rvoff)) {
431 emit(rv_jal(rd, rvoff >> 1), ctx);
432 return 0;
433 } else if (in_auipc_jalr_range(rvoff)) {
434 upper = (rvoff + (1 << 11)) >> 12;
435 lower = rvoff & 0xfff;
436 emit(rv_auipc(RV_REG_T1, upper), ctx);
437 emit(rv_jalr(rd, RV_REG_T1, lower), ctx);
438 return 0;
441 pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff);
442 return -ERANGE;
445 static bool is_signed_bpf_cond(u8 cond)
447 return cond == BPF_JSGT || cond == BPF_JSLT ||
448 cond == BPF_JSGE || cond == BPF_JSLE;
451 static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
453 s64 off = 0;
454 u64 ip;
456 if (addr && ctx->insns && ctx->ro_insns) {
458 * Use the ro_insns(RX) to calculate the offset as the BPF
459 * program will finally run from this memory region.
461 ip = (u64)(long)(ctx->ro_insns + ctx->ninsns);
462 off = addr - ip;
465 return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx);
468 static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx)
470 if (IS_ENABLED(CONFIG_CFI_CLANG))
471 emit(hash, ctx);
474 static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
475 struct rv_jit_context *ctx)
477 u8 r0;
478 int jmp_offset;
480 if (off) {
481 if (is_12b_int(off)) {
482 emit_addi(RV_REG_T1, rd, off, ctx);
483 } else {
484 emit_imm(RV_REG_T1, off, ctx);
485 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
487 rd = RV_REG_T1;
490 switch (imm) {
491 /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */
492 case BPF_ADD:
493 emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) :
494 rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
495 break;
496 case BPF_AND:
497 emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) :
498 rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
499 break;
500 case BPF_OR:
501 emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) :
502 rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
503 break;
504 case BPF_XOR:
505 emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) :
506 rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
507 break;
508 /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
509 case BPF_ADD | BPF_FETCH:
510 emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) :
511 rv_amoadd_w(rs, rs, rd, 1, 1), ctx);
512 if (!is64)
513 emit_zextw(rs, rs, ctx);
514 break;
515 case BPF_AND | BPF_FETCH:
516 emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) :
517 rv_amoand_w(rs, rs, rd, 1, 1), ctx);
518 if (!is64)
519 emit_zextw(rs, rs, ctx);
520 break;
521 case BPF_OR | BPF_FETCH:
522 emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) :
523 rv_amoor_w(rs, rs, rd, 1, 1), ctx);
524 if (!is64)
525 emit_zextw(rs, rs, ctx);
526 break;
527 case BPF_XOR | BPF_FETCH:
528 emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) :
529 rv_amoxor_w(rs, rs, rd, 1, 1), ctx);
530 if (!is64)
531 emit_zextw(rs, rs, ctx);
532 break;
533 /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
534 case BPF_XCHG:
535 emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) :
536 rv_amoswap_w(rs, rs, rd, 1, 1), ctx);
537 if (!is64)
538 emit_zextw(rs, rs, ctx);
539 break;
540 /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
541 case BPF_CMPXCHG:
542 r0 = bpf_to_rv_reg(BPF_REG_0, ctx);
543 if (is64)
544 emit_mv(RV_REG_T2, r0, ctx);
545 else
546 emit_addiw(RV_REG_T2, r0, 0, ctx);
547 emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
548 rv_lr_w(r0, 0, rd, 0, 0), ctx);
549 jmp_offset = ninsns_rvoff(8);
550 emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
551 emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) :
552 rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx);
553 jmp_offset = ninsns_rvoff(-6);
554 emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
555 emit(rv_fence(0x3, 0x3), ctx);
556 break;
560 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
561 #define BPF_FIXUP_REG_MASK GENMASK(31, 27)
562 #define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */
564 bool ex_handler_bpf(const struct exception_table_entry *ex,
565 struct pt_regs *regs)
567 off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
568 int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
570 if (regs_offset != REG_DONT_CLEAR_MARKER)
571 *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
572 regs->epc = (unsigned long)&ex->fixup - offset;
574 return true;
577 /* For accesses to BTF pointers, add an entry to the exception table */
578 static int add_exception_handler(const struct bpf_insn *insn,
579 struct rv_jit_context *ctx,
580 int dst_reg, int insn_len)
582 struct exception_table_entry *ex;
583 unsigned long pc;
584 off_t ins_offset;
585 off_t fixup_offset;
587 if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
588 (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
589 BPF_MODE(insn->code) != BPF_PROBE_MEM32))
590 return 0;
592 if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
593 return -EINVAL;
595 if (WARN_ON_ONCE(insn_len > ctx->ninsns))
596 return -EINVAL;
598 if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1))
599 return -EINVAL;
601 ex = &ctx->prog->aux->extable[ctx->nexentries];
602 pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len];
605 * This is the relative offset of the instruction that may fault from
606 * the exception table itself. This will be written to the exception
607 * table and if this instruction faults, the destination register will
608 * be set to '0' and the execution will jump to the next instruction.
610 ins_offset = pc - (long)&ex->insn;
611 if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
612 return -ERANGE;
615 * Since the extable follows the program, the fixup offset is always
616 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
617 * to keep things simple, and put the destination register in the upper
618 * bits. We don't need to worry about buildtime or runtime sort
619 * modifying the upper bits because the table is already sorted, and
620 * isn't part of the main exception table.
622 * The fixup_offset is set to the next instruction from the instruction
623 * that may fault. The execution will jump to this after handling the
624 * fault.
626 fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
627 if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
628 return -ERANGE;
631 * The offsets above have been calculated using the RO buffer but we
632 * need to use the R/W buffer for writes.
633 * switch ex to rw buffer for writing.
635 ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns);
637 ex->insn = ins_offset;
639 ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
640 FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
641 ex->type = EX_TYPE_BPF;
643 ctx->nexentries++;
644 return 0;
647 static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
649 s64 rvoff;
650 struct rv_jit_context ctx;
652 ctx.ninsns = 0;
653 ctx.insns = (u16 *)insns;
655 if (!target) {
656 emit(rv_nop(), &ctx);
657 emit(rv_nop(), &ctx);
658 return 0;
661 rvoff = (s64)(target - ip);
662 return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx);
665 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
666 void *old_addr, void *new_addr)
668 u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS];
669 bool is_call = poke_type == BPF_MOD_CALL;
670 int ret;
672 if (!is_kernel_text((unsigned long)ip) &&
673 !is_bpf_text_address((unsigned long)ip))
674 return -ENOTSUPP;
676 ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call);
677 if (ret)
678 return ret;
680 if (memcmp(ip, old_insns, RV_FENTRY_NBYTES))
681 return -EFAULT;
683 ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call);
684 if (ret)
685 return ret;
687 cpus_read_lock();
688 mutex_lock(&text_mutex);
689 if (memcmp(ip, new_insns, RV_FENTRY_NBYTES))
690 ret = patch_text(ip, new_insns, RV_FENTRY_NBYTES);
691 mutex_unlock(&text_mutex);
692 cpus_read_unlock();
694 return ret;
697 static void store_args(int nr_arg_slots, int args_off, struct rv_jit_context *ctx)
699 int i;
701 for (i = 0; i < nr_arg_slots; i++) {
702 if (i < RV_MAX_REG_ARGS) {
703 emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
704 } else {
705 /* skip slots for T0 and FP of traced function */
706 emit_ld(RV_REG_T1, 16 + (i - RV_MAX_REG_ARGS) * 8, RV_REG_FP, ctx);
707 emit_sd(RV_REG_FP, -args_off, RV_REG_T1, ctx);
709 args_off -= 8;
713 static void restore_args(int nr_reg_args, int args_off, struct rv_jit_context *ctx)
715 int i;
717 for (i = 0; i < nr_reg_args; i++) {
718 emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
719 args_off -= 8;
723 static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off,
724 struct rv_jit_context *ctx)
726 int i;
728 for (i = 0; i < nr_stack_args; i++) {
729 emit_ld(RV_REG_T1, -(args_off - RV_MAX_REG_ARGS * 8), RV_REG_FP, ctx);
730 emit_sd(RV_REG_FP, -stk_arg_off, RV_REG_T1, ctx);
731 args_off -= 8;
732 stk_arg_off -= 8;
736 static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
737 int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
739 int ret, branch_off;
740 struct bpf_prog *p = l->link.prog;
741 int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
743 if (l->cookie) {
744 emit_imm(RV_REG_T1, l->cookie, ctx);
745 emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_T1, ctx);
746 } else {
747 emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx);
750 /* arg1: prog */
751 emit_imm(RV_REG_A0, (const s64)p, ctx);
752 /* arg2: &run_ctx */
753 emit_addi(RV_REG_A1, RV_REG_FP, -run_ctx_off, ctx);
754 ret = emit_call((const u64)bpf_trampoline_enter(p), true, ctx);
755 if (ret)
756 return ret;
758 /* store prog start time */
759 emit_mv(RV_REG_S1, RV_REG_A0, ctx);
761 /* if (__bpf_prog_enter(prog) == 0)
762 * goto skip_exec_of_prog;
764 branch_off = ctx->ninsns;
765 /* nop reserved for conditional jump */
766 emit(rv_nop(), ctx);
768 /* arg1: &args_off */
769 emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
770 if (!p->jited)
771 /* arg2: progs[i]->insnsi for interpreter */
772 emit_imm(RV_REG_A1, (const s64)p->insnsi, ctx);
773 ret = emit_call((const u64)p->bpf_func, true, ctx);
774 if (ret)
775 return ret;
777 if (save_ret) {
778 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
779 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx);
782 /* update branch with beqz */
783 if (ctx->insns) {
784 int offset = ninsns_rvoff(ctx->ninsns - branch_off);
785 u32 insn = rv_beq(RV_REG_A0, RV_REG_ZERO, offset >> 1);
786 *(u32 *)(ctx->insns + branch_off) = insn;
789 /* arg1: prog */
790 emit_imm(RV_REG_A0, (const s64)p, ctx);
791 /* arg2: prog start time */
792 emit_mv(RV_REG_A1, RV_REG_S1, ctx);
793 /* arg3: &run_ctx */
794 emit_addi(RV_REG_A2, RV_REG_FP, -run_ctx_off, ctx);
795 ret = emit_call((const u64)bpf_trampoline_exit(p), true, ctx);
797 return ret;
800 static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
801 const struct btf_func_model *m,
802 struct bpf_tramp_links *tlinks,
803 void *func_addr, u32 flags,
804 struct rv_jit_context *ctx)
806 int i, ret, offset;
807 int *branches_off = NULL;
808 int stack_size = 0, nr_arg_slots = 0;
809 int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off;
810 struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
811 struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
812 struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
813 bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
814 void *orig_call = func_addr;
815 bool save_ret;
816 u32 insn;
818 /* Two types of generated trampoline stack layout:
820 * 1. trampoline called from function entry
821 * --------------------------------------
822 * FP + 8 [ RA to parent func ] return address to parent
823 * function
824 * FP + 0 [ FP of parent func ] frame pointer of parent
825 * function
826 * FP - 8 [ T0 to traced func ] return address of traced
827 * function
828 * FP - 16 [ FP of traced func ] frame pointer of traced
829 * function
830 * --------------------------------------
832 * 2. trampoline called directly
833 * --------------------------------------
834 * FP - 8 [ RA to caller func ] return address to caller
835 * function
836 * FP - 16 [ FP of caller func ] frame pointer of caller
837 * function
838 * --------------------------------------
840 * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
841 * BPF_TRAMP_F_RET_FENTRY_RET
842 * [ argN ]
843 * [ ... ]
844 * FP - args_off [ arg1 ]
846 * FP - nregs_off [ regs count ]
848 * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG
850 * FP - run_ctx_off [ bpf_tramp_run_ctx ]
852 * FP - sreg_off [ callee saved reg ]
854 * [ pads ] pads for 16 bytes alignment
856 * [ stack_argN ]
857 * [ ... ]
858 * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG
861 if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
862 return -ENOTSUPP;
864 if (m->nr_args > MAX_BPF_FUNC_ARGS)
865 return -ENOTSUPP;
867 for (i = 0; i < m->nr_args; i++)
868 nr_arg_slots += round_up(m->arg_size[i], 8) / 8;
870 /* room of trampoline frame to store return address and frame pointer */
871 stack_size += 16;
873 save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
874 if (save_ret) {
875 stack_size += 16; /* Save both A5 (BPF R0) and A0 */
876 retval_off = stack_size;
879 stack_size += nr_arg_slots * 8;
880 args_off = stack_size;
882 stack_size += 8;
883 nregs_off = stack_size;
885 if (flags & BPF_TRAMP_F_IP_ARG) {
886 stack_size += 8;
887 ip_off = stack_size;
890 stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
891 run_ctx_off = stack_size;
893 stack_size += 8;
894 sreg_off = stack_size;
896 if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - RV_MAX_REG_ARGS > 0))
897 stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8;
899 stack_size = round_up(stack_size, STACK_ALIGN);
901 /* room for args on stack must be at the top of stack */
902 stk_arg_off = stack_size;
904 if (!is_struct_ops) {
905 /* For the trampoline called from function entry,
906 * the frame of traced function and the frame of
907 * trampoline need to be considered.
909 emit_addi(RV_REG_SP, RV_REG_SP, -16, ctx);
910 emit_sd(RV_REG_SP, 8, RV_REG_RA, ctx);
911 emit_sd(RV_REG_SP, 0, RV_REG_FP, ctx);
912 emit_addi(RV_REG_FP, RV_REG_SP, 16, ctx);
914 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
915 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_T0, ctx);
916 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
917 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
918 } else {
919 /* emit kcfi hash */
920 emit_kcfi(cfi_get_func_hash(func_addr), ctx);
921 /* For the trampoline called directly, just handle
922 * the frame of trampoline.
924 emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
925 emit_sd(RV_REG_SP, stack_size - 8, RV_REG_RA, ctx);
926 emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx);
927 emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
930 /* callee saved register S1 to pass start time */
931 emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx);
933 /* store ip address of the traced function */
934 if (flags & BPF_TRAMP_F_IP_ARG) {
935 emit_imm(RV_REG_T1, (const s64)func_addr, ctx);
936 emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx);
939 emit_li(RV_REG_T1, nr_arg_slots, ctx);
940 emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx);
942 store_args(nr_arg_slots, args_off, ctx);
944 /* skip to actual body of traced function */
945 if (flags & BPF_TRAMP_F_SKIP_FRAME)
946 orig_call += RV_FENTRY_NINSNS * 4;
948 if (flags & BPF_TRAMP_F_CALL_ORIG) {
949 emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx);
950 ret = emit_call((const u64)__bpf_tramp_enter, true, ctx);
951 if (ret)
952 return ret;
955 for (i = 0; i < fentry->nr_links; i++) {
956 ret = invoke_bpf_prog(fentry->links[i], args_off, retval_off, run_ctx_off,
957 flags & BPF_TRAMP_F_RET_FENTRY_RET, ctx);
958 if (ret)
959 return ret;
962 if (fmod_ret->nr_links) {
963 branches_off = kcalloc(fmod_ret->nr_links, sizeof(int), GFP_KERNEL);
964 if (!branches_off)
965 return -ENOMEM;
967 /* cleanup to avoid garbage return value confusion */
968 emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx);
969 for (i = 0; i < fmod_ret->nr_links; i++) {
970 ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off,
971 run_ctx_off, true, ctx);
972 if (ret)
973 goto out;
974 emit_ld(RV_REG_T1, -retval_off, RV_REG_FP, ctx);
975 branches_off[i] = ctx->ninsns;
976 /* nop reserved for conditional jump */
977 emit(rv_nop(), ctx);
981 if (flags & BPF_TRAMP_F_CALL_ORIG) {
982 restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx);
983 restore_stack_args(nr_arg_slots - RV_MAX_REG_ARGS, args_off, stk_arg_off, ctx);
984 ret = emit_call((const u64)orig_call, true, ctx);
985 if (ret)
986 goto out;
987 emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
988 emit_sd(RV_REG_FP, -(retval_off - 8), regmap[BPF_REG_0], ctx);
989 im->ip_after_call = ctx->ro_insns + ctx->ninsns;
990 /* 2 nops reserved for auipc+jalr pair */
991 emit(rv_nop(), ctx);
992 emit(rv_nop(), ctx);
995 /* update branches saved in invoke_bpf_mod_ret with bnez */
996 for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) {
997 offset = ninsns_rvoff(ctx->ninsns - branches_off[i]);
998 insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1);
999 *(u32 *)(ctx->insns + branches_off[i]) = insn;
1002 for (i = 0; i < fexit->nr_links; i++) {
1003 ret = invoke_bpf_prog(fexit->links[i], args_off, retval_off,
1004 run_ctx_off, false, ctx);
1005 if (ret)
1006 goto out;
1009 if (flags & BPF_TRAMP_F_CALL_ORIG) {
1010 im->ip_epilogue = ctx->ro_insns + ctx->ninsns;
1011 emit_imm(RV_REG_A0, ctx->insns ? (const s64)im : RV_MAX_COUNT_IMM, ctx);
1012 ret = emit_call((const u64)__bpf_tramp_exit, true, ctx);
1013 if (ret)
1014 goto out;
1017 if (flags & BPF_TRAMP_F_RESTORE_REGS)
1018 restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx);
1020 if (save_ret) {
1021 emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
1022 emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx);
1025 emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
1027 if (!is_struct_ops) {
1028 /* trampoline called from function entry */
1029 emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx);
1030 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
1031 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
1033 emit_ld(RV_REG_RA, 8, RV_REG_SP, ctx);
1034 emit_ld(RV_REG_FP, 0, RV_REG_SP, ctx);
1035 emit_addi(RV_REG_SP, RV_REG_SP, 16, ctx);
1037 if (flags & BPF_TRAMP_F_SKIP_FRAME)
1038 /* return to parent function */
1039 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
1040 else
1041 /* return to traced function */
1042 emit_jalr(RV_REG_ZERO, RV_REG_T0, 0, ctx);
1043 } else {
1044 /* trampoline called directly */
1045 emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx);
1046 emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
1047 emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
1049 emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
1052 ret = ctx->ninsns;
1053 out:
1054 kfree(branches_off);
1055 return ret;
1058 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
1059 struct bpf_tramp_links *tlinks, void *func_addr)
1061 struct bpf_tramp_image im;
1062 struct rv_jit_context ctx;
1063 int ret;
1065 ctx.ninsns = 0;
1066 ctx.insns = NULL;
1067 ctx.ro_insns = NULL;
1068 ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
1070 return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
1073 void *arch_alloc_bpf_trampoline(unsigned int size)
1075 return bpf_prog_pack_alloc(size, bpf_fill_ill_insns);
1078 void arch_free_bpf_trampoline(void *image, unsigned int size)
1080 bpf_prog_pack_free(image, size);
1083 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
1084 void *ro_image_end, const struct btf_func_model *m,
1085 u32 flags, struct bpf_tramp_links *tlinks,
1086 void *func_addr)
1088 int ret;
1089 void *image, *res;
1090 struct rv_jit_context ctx;
1091 u32 size = ro_image_end - ro_image;
1093 image = kvmalloc(size, GFP_KERNEL);
1094 if (!image)
1095 return -ENOMEM;
1097 ctx.ninsns = 0;
1098 ctx.insns = image;
1099 ctx.ro_insns = ro_image;
1100 ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
1101 if (ret < 0)
1102 goto out;
1104 if (WARN_ON(size < ninsns_rvoff(ctx.ninsns))) {
1105 ret = -E2BIG;
1106 goto out;
1109 res = bpf_arch_text_copy(ro_image, image, size);
1110 if (IS_ERR(res)) {
1111 ret = PTR_ERR(res);
1112 goto out;
1115 bpf_flush_icache(ro_image, ro_image_end);
1116 out:
1117 kvfree(image);
1118 return ret < 0 ? ret : size;
1121 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
1122 bool extra_pass)
1124 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
1125 BPF_CLASS(insn->code) == BPF_JMP;
1126 int s, e, rvoff, ret, i = insn - ctx->prog->insnsi;
1127 struct bpf_prog_aux *aux = ctx->prog->aux;
1128 u8 rd = -1, rs = -1, code = insn->code;
1129 s16 off = insn->off;
1130 s32 imm = insn->imm;
1132 init_regs(&rd, &rs, insn, ctx);
1134 switch (code) {
1135 /* dst = src */
1136 case BPF_ALU | BPF_MOV | BPF_X:
1137 case BPF_ALU64 | BPF_MOV | BPF_X:
1138 if (insn_is_cast_user(insn)) {
1139 emit_mv(RV_REG_T1, rs, ctx);
1140 emit_zextw(RV_REG_T1, RV_REG_T1, ctx);
1141 emit_imm(rd, (ctx->user_vm_start >> 32) << 32, ctx);
1142 emit(rv_beq(RV_REG_T1, RV_REG_ZERO, 4), ctx);
1143 emit_or(RV_REG_T1, rd, RV_REG_T1, ctx);
1144 emit_mv(rd, RV_REG_T1, ctx);
1145 break;
1146 } else if (insn_is_mov_percpu_addr(insn)) {
1147 if (rd != rs)
1148 emit_mv(rd, rs, ctx);
1149 #ifdef CONFIG_SMP
1150 /* Load current CPU number in T1 */
1151 emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu),
1152 RV_REG_TP, ctx);
1153 /* Load address of __per_cpu_offset array in T2 */
1154 emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx);
1155 /* Get address of __per_cpu_offset[cpu] in T1 */
1156 emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx);
1157 /* Load __per_cpu_offset[cpu] in T1 */
1158 emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx);
1159 /* Add the offset to Rd */
1160 emit_add(rd, rd, RV_REG_T1, ctx);
1161 #endif
1163 if (imm == 1) {
1164 /* Special mov32 for zext */
1165 emit_zextw(rd, rd, ctx);
1166 break;
1168 switch (insn->off) {
1169 case 0:
1170 emit_mv(rd, rs, ctx);
1171 break;
1172 case 8:
1173 emit_sextb(rd, rs, ctx);
1174 break;
1175 case 16:
1176 emit_sexth(rd, rs, ctx);
1177 break;
1178 case 32:
1179 emit_sextw(rd, rs, ctx);
1180 break;
1182 if (!is64 && !aux->verifier_zext)
1183 emit_zextw(rd, rd, ctx);
1184 break;
1186 /* dst = dst OP src */
1187 case BPF_ALU | BPF_ADD | BPF_X:
1188 case BPF_ALU64 | BPF_ADD | BPF_X:
1189 emit_add(rd, rd, rs, ctx);
1190 if (!is64 && !aux->verifier_zext)
1191 emit_zextw(rd, rd, ctx);
1192 break;
1193 case BPF_ALU | BPF_SUB | BPF_X:
1194 case BPF_ALU64 | BPF_SUB | BPF_X:
1195 if (is64)
1196 emit_sub(rd, rd, rs, ctx);
1197 else
1198 emit_subw(rd, rd, rs, ctx);
1200 if (!is64 && !aux->verifier_zext)
1201 emit_zextw(rd, rd, ctx);
1202 break;
1203 case BPF_ALU | BPF_AND | BPF_X:
1204 case BPF_ALU64 | BPF_AND | BPF_X:
1205 emit_and(rd, rd, rs, ctx);
1206 if (!is64 && !aux->verifier_zext)
1207 emit_zextw(rd, rd, ctx);
1208 break;
1209 case BPF_ALU | BPF_OR | BPF_X:
1210 case BPF_ALU64 | BPF_OR | BPF_X:
1211 emit_or(rd, rd, rs, ctx);
1212 if (!is64 && !aux->verifier_zext)
1213 emit_zextw(rd, rd, ctx);
1214 break;
1215 case BPF_ALU | BPF_XOR | BPF_X:
1216 case BPF_ALU64 | BPF_XOR | BPF_X:
1217 emit_xor(rd, rd, rs, ctx);
1218 if (!is64 && !aux->verifier_zext)
1219 emit_zextw(rd, rd, ctx);
1220 break;
1221 case BPF_ALU | BPF_MUL | BPF_X:
1222 case BPF_ALU64 | BPF_MUL | BPF_X:
1223 emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
1224 if (!is64 && !aux->verifier_zext)
1225 emit_zextw(rd, rd, ctx);
1226 break;
1227 case BPF_ALU | BPF_DIV | BPF_X:
1228 case BPF_ALU64 | BPF_DIV | BPF_X:
1229 if (off)
1230 emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx);
1231 else
1232 emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
1233 if (!is64 && !aux->verifier_zext)
1234 emit_zextw(rd, rd, ctx);
1235 break;
1236 case BPF_ALU | BPF_MOD | BPF_X:
1237 case BPF_ALU64 | BPF_MOD | BPF_X:
1238 if (off)
1239 emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx);
1240 else
1241 emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
1242 if (!is64 && !aux->verifier_zext)
1243 emit_zextw(rd, rd, ctx);
1244 break;
1245 case BPF_ALU | BPF_LSH | BPF_X:
1246 case BPF_ALU64 | BPF_LSH | BPF_X:
1247 emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
1248 if (!is64 && !aux->verifier_zext)
1249 emit_zextw(rd, rd, ctx);
1250 break;
1251 case BPF_ALU | BPF_RSH | BPF_X:
1252 case BPF_ALU64 | BPF_RSH | BPF_X:
1253 emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
1254 if (!is64 && !aux->verifier_zext)
1255 emit_zextw(rd, rd, ctx);
1256 break;
1257 case BPF_ALU | BPF_ARSH | BPF_X:
1258 case BPF_ALU64 | BPF_ARSH | BPF_X:
1259 emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
1260 if (!is64 && !aux->verifier_zext)
1261 emit_zextw(rd, rd, ctx);
1262 break;
1264 /* dst = -dst */
1265 case BPF_ALU | BPF_NEG:
1266 case BPF_ALU64 | BPF_NEG:
1267 emit_sub(rd, RV_REG_ZERO, rd, ctx);
1268 if (!is64 && !aux->verifier_zext)
1269 emit_zextw(rd, rd, ctx);
1270 break;
1272 /* dst = BSWAP##imm(dst) */
1273 case BPF_ALU | BPF_END | BPF_FROM_LE:
1274 switch (imm) {
1275 case 16:
1276 emit_zexth(rd, rd, ctx);
1277 break;
1278 case 32:
1279 if (!aux->verifier_zext)
1280 emit_zextw(rd, rd, ctx);
1281 break;
1282 case 64:
1283 /* Do nothing */
1284 break;
1286 break;
1287 case BPF_ALU | BPF_END | BPF_FROM_BE:
1288 case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1289 emit_bswap(rd, imm, ctx);
1290 break;
1292 /* dst = imm */
1293 case BPF_ALU | BPF_MOV | BPF_K:
1294 case BPF_ALU64 | BPF_MOV | BPF_K:
1295 emit_imm(rd, imm, ctx);
1296 if (!is64 && !aux->verifier_zext)
1297 emit_zextw(rd, rd, ctx);
1298 break;
1300 /* dst = dst OP imm */
1301 case BPF_ALU | BPF_ADD | BPF_K:
1302 case BPF_ALU64 | BPF_ADD | BPF_K:
1303 if (is_12b_int(imm)) {
1304 emit_addi(rd, rd, imm, ctx);
1305 } else {
1306 emit_imm(RV_REG_T1, imm, ctx);
1307 emit_add(rd, rd, RV_REG_T1, ctx);
1309 if (!is64 && !aux->verifier_zext)
1310 emit_zextw(rd, rd, ctx);
1311 break;
1312 case BPF_ALU | BPF_SUB | BPF_K:
1313 case BPF_ALU64 | BPF_SUB | BPF_K:
1314 if (is_12b_int(-imm)) {
1315 emit_addi(rd, rd, -imm, ctx);
1316 } else {
1317 emit_imm(RV_REG_T1, imm, ctx);
1318 emit_sub(rd, rd, RV_REG_T1, ctx);
1320 if (!is64 && !aux->verifier_zext)
1321 emit_zextw(rd, rd, ctx);
1322 break;
1323 case BPF_ALU | BPF_AND | BPF_K:
1324 case BPF_ALU64 | BPF_AND | BPF_K:
1325 if (is_12b_int(imm)) {
1326 emit_andi(rd, rd, imm, ctx);
1327 } else {
1328 emit_imm(RV_REG_T1, imm, ctx);
1329 emit_and(rd, rd, RV_REG_T1, ctx);
1331 if (!is64 && !aux->verifier_zext)
1332 emit_zextw(rd, rd, ctx);
1333 break;
1334 case BPF_ALU | BPF_OR | BPF_K:
1335 case BPF_ALU64 | BPF_OR | BPF_K:
1336 if (is_12b_int(imm)) {
1337 emit(rv_ori(rd, rd, imm), ctx);
1338 } else {
1339 emit_imm(RV_REG_T1, imm, ctx);
1340 emit_or(rd, rd, RV_REG_T1, ctx);
1342 if (!is64 && !aux->verifier_zext)
1343 emit_zextw(rd, rd, ctx);
1344 break;
1345 case BPF_ALU | BPF_XOR | BPF_K:
1346 case BPF_ALU64 | BPF_XOR | BPF_K:
1347 if (is_12b_int(imm)) {
1348 emit(rv_xori(rd, rd, imm), ctx);
1349 } else {
1350 emit_imm(RV_REG_T1, imm, ctx);
1351 emit_xor(rd, rd, RV_REG_T1, ctx);
1353 if (!is64 && !aux->verifier_zext)
1354 emit_zextw(rd, rd, ctx);
1355 break;
1356 case BPF_ALU | BPF_MUL | BPF_K:
1357 case BPF_ALU64 | BPF_MUL | BPF_K:
1358 emit_imm(RV_REG_T1, imm, ctx);
1359 emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
1360 rv_mulw(rd, rd, RV_REG_T1), ctx);
1361 if (!is64 && !aux->verifier_zext)
1362 emit_zextw(rd, rd, ctx);
1363 break;
1364 case BPF_ALU | BPF_DIV | BPF_K:
1365 case BPF_ALU64 | BPF_DIV | BPF_K:
1366 emit_imm(RV_REG_T1, imm, ctx);
1367 if (off)
1368 emit(is64 ? rv_div(rd, rd, RV_REG_T1) :
1369 rv_divw(rd, rd, RV_REG_T1), ctx);
1370 else
1371 emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
1372 rv_divuw(rd, rd, RV_REG_T1), ctx);
1373 if (!is64 && !aux->verifier_zext)
1374 emit_zextw(rd, rd, ctx);
1375 break;
1376 case BPF_ALU | BPF_MOD | BPF_K:
1377 case BPF_ALU64 | BPF_MOD | BPF_K:
1378 emit_imm(RV_REG_T1, imm, ctx);
1379 if (off)
1380 emit(is64 ? rv_rem(rd, rd, RV_REG_T1) :
1381 rv_remw(rd, rd, RV_REG_T1), ctx);
1382 else
1383 emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
1384 rv_remuw(rd, rd, RV_REG_T1), ctx);
1385 if (!is64 && !aux->verifier_zext)
1386 emit_zextw(rd, rd, ctx);
1387 break;
1388 case BPF_ALU | BPF_LSH | BPF_K:
1389 case BPF_ALU64 | BPF_LSH | BPF_K:
1390 emit_slli(rd, rd, imm, ctx);
1392 if (!is64 && !aux->verifier_zext)
1393 emit_zextw(rd, rd, ctx);
1394 break;
1395 case BPF_ALU | BPF_RSH | BPF_K:
1396 case BPF_ALU64 | BPF_RSH | BPF_K:
1397 if (is64)
1398 emit_srli(rd, rd, imm, ctx);
1399 else
1400 emit(rv_srliw(rd, rd, imm), ctx);
1402 if (!is64 && !aux->verifier_zext)
1403 emit_zextw(rd, rd, ctx);
1404 break;
1405 case BPF_ALU | BPF_ARSH | BPF_K:
1406 case BPF_ALU64 | BPF_ARSH | BPF_K:
1407 if (is64)
1408 emit_srai(rd, rd, imm, ctx);
1409 else
1410 emit(rv_sraiw(rd, rd, imm), ctx);
1412 if (!is64 && !aux->verifier_zext)
1413 emit_zextw(rd, rd, ctx);
1414 break;
1416 /* JUMP off */
1417 case BPF_JMP | BPF_JA:
1418 case BPF_JMP32 | BPF_JA:
1419 if (BPF_CLASS(code) == BPF_JMP)
1420 rvoff = rv_offset(i, off, ctx);
1421 else
1422 rvoff = rv_offset(i, imm, ctx);
1423 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
1424 if (ret)
1425 return ret;
1426 break;
1428 /* IF (dst COND src) JUMP off */
1429 case BPF_JMP | BPF_JEQ | BPF_X:
1430 case BPF_JMP32 | BPF_JEQ | BPF_X:
1431 case BPF_JMP | BPF_JGT | BPF_X:
1432 case BPF_JMP32 | BPF_JGT | BPF_X:
1433 case BPF_JMP | BPF_JLT | BPF_X:
1434 case BPF_JMP32 | BPF_JLT | BPF_X:
1435 case BPF_JMP | BPF_JGE | BPF_X:
1436 case BPF_JMP32 | BPF_JGE | BPF_X:
1437 case BPF_JMP | BPF_JLE | BPF_X:
1438 case BPF_JMP32 | BPF_JLE | BPF_X:
1439 case BPF_JMP | BPF_JNE | BPF_X:
1440 case BPF_JMP32 | BPF_JNE | BPF_X:
1441 case BPF_JMP | BPF_JSGT | BPF_X:
1442 case BPF_JMP32 | BPF_JSGT | BPF_X:
1443 case BPF_JMP | BPF_JSLT | BPF_X:
1444 case BPF_JMP32 | BPF_JSLT | BPF_X:
1445 case BPF_JMP | BPF_JSGE | BPF_X:
1446 case BPF_JMP32 | BPF_JSGE | BPF_X:
1447 case BPF_JMP | BPF_JSLE | BPF_X:
1448 case BPF_JMP32 | BPF_JSLE | BPF_X:
1449 case BPF_JMP | BPF_JSET | BPF_X:
1450 case BPF_JMP32 | BPF_JSET | BPF_X:
1451 rvoff = rv_offset(i, off, ctx);
1452 if (!is64) {
1453 s = ctx->ninsns;
1454 if (is_signed_bpf_cond(BPF_OP(code))) {
1455 emit_sextw_alt(&rs, RV_REG_T1, ctx);
1456 emit_sextw_alt(&rd, RV_REG_T2, ctx);
1457 } else {
1458 emit_zextw_alt(&rs, RV_REG_T1, ctx);
1459 emit_zextw_alt(&rd, RV_REG_T2, ctx);
1461 e = ctx->ninsns;
1463 /* Adjust for extra insns */
1464 rvoff -= ninsns_rvoff(e - s);
1467 if (BPF_OP(code) == BPF_JSET) {
1468 /* Adjust for and */
1469 rvoff -= 4;
1470 emit_and(RV_REG_T1, rd, rs, ctx);
1471 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
1472 } else {
1473 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
1475 break;
1477 /* IF (dst COND imm) JUMP off */
1478 case BPF_JMP | BPF_JEQ | BPF_K:
1479 case BPF_JMP32 | BPF_JEQ | BPF_K:
1480 case BPF_JMP | BPF_JGT | BPF_K:
1481 case BPF_JMP32 | BPF_JGT | BPF_K:
1482 case BPF_JMP | BPF_JLT | BPF_K:
1483 case BPF_JMP32 | BPF_JLT | BPF_K:
1484 case BPF_JMP | BPF_JGE | BPF_K:
1485 case BPF_JMP32 | BPF_JGE | BPF_K:
1486 case BPF_JMP | BPF_JLE | BPF_K:
1487 case BPF_JMP32 | BPF_JLE | BPF_K:
1488 case BPF_JMP | BPF_JNE | BPF_K:
1489 case BPF_JMP32 | BPF_JNE | BPF_K:
1490 case BPF_JMP | BPF_JSGT | BPF_K:
1491 case BPF_JMP32 | BPF_JSGT | BPF_K:
1492 case BPF_JMP | BPF_JSLT | BPF_K:
1493 case BPF_JMP32 | BPF_JSLT | BPF_K:
1494 case BPF_JMP | BPF_JSGE | BPF_K:
1495 case BPF_JMP32 | BPF_JSGE | BPF_K:
1496 case BPF_JMP | BPF_JSLE | BPF_K:
1497 case BPF_JMP32 | BPF_JSLE | BPF_K:
1498 rvoff = rv_offset(i, off, ctx);
1499 s = ctx->ninsns;
1500 if (imm)
1501 emit_imm(RV_REG_T1, imm, ctx);
1502 rs = imm ? RV_REG_T1 : RV_REG_ZERO;
1503 if (!is64) {
1504 if (is_signed_bpf_cond(BPF_OP(code))) {
1505 emit_sextw_alt(&rd, RV_REG_T2, ctx);
1506 /* rs has been sign extended */
1507 } else {
1508 emit_zextw_alt(&rd, RV_REG_T2, ctx);
1509 if (imm)
1510 emit_zextw(rs, rs, ctx);
1513 e = ctx->ninsns;
1515 /* Adjust for extra insns */
1516 rvoff -= ninsns_rvoff(e - s);
1517 emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
1518 break;
1520 case BPF_JMP | BPF_JSET | BPF_K:
1521 case BPF_JMP32 | BPF_JSET | BPF_K:
1522 rvoff = rv_offset(i, off, ctx);
1523 s = ctx->ninsns;
1524 if (is_12b_int(imm)) {
1525 emit_andi(RV_REG_T1, rd, imm, ctx);
1526 } else {
1527 emit_imm(RV_REG_T1, imm, ctx);
1528 emit_and(RV_REG_T1, rd, RV_REG_T1, ctx);
1530 /* For jset32, we should clear the upper 32 bits of t1, but
1531 * sign-extension is sufficient here and saves one instruction,
1532 * as t1 is used only in comparison against zero.
1534 if (!is64 && imm < 0)
1535 emit_sextw(RV_REG_T1, RV_REG_T1, ctx);
1536 e = ctx->ninsns;
1537 rvoff -= ninsns_rvoff(e - s);
1538 emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
1539 break;
1541 /* function call */
1542 case BPF_JMP | BPF_CALL:
1544 bool fixed_addr;
1545 u64 addr;
1547 /* Inline calls to bpf_get_smp_processor_id()
1549 * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is
1550 * at offset 0 in task_struct.
1551 * Load cpu from thread_info:
1552 * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu
1554 * This replicates the implementation of raw_smp_processor_id() on RISCV
1556 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {
1557 /* Load current CPU number in R0 */
1558 emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu),
1559 RV_REG_TP, ctx);
1560 break;
1563 mark_call(ctx);
1564 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1565 &addr, &fixed_addr);
1566 if (ret < 0)
1567 return ret;
1569 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1570 const struct btf_func_model *fm;
1571 int idx;
1573 fm = bpf_jit_find_kfunc_model(ctx->prog, insn);
1574 if (!fm)
1575 return -EINVAL;
1577 for (idx = 0; idx < fm->nr_args; idx++) {
1578 u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx);
1580 if (fm->arg_size[idx] == sizeof(int))
1581 emit_sextw(reg, reg, ctx);
1585 ret = emit_call(addr, fixed_addr, ctx);
1586 if (ret)
1587 return ret;
1589 if (insn->src_reg != BPF_PSEUDO_CALL)
1590 emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx);
1591 break;
1593 /* tail call */
1594 case BPF_JMP | BPF_TAIL_CALL:
1595 if (emit_bpf_tail_call(i, ctx))
1596 return -1;
1597 break;
1599 /* function return */
1600 case BPF_JMP | BPF_EXIT:
1601 if (i == ctx->prog->len - 1)
1602 break;
1604 rvoff = epilogue_offset(ctx);
1605 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
1606 if (ret)
1607 return ret;
1608 break;
1610 /* dst = imm64 */
1611 case BPF_LD | BPF_IMM | BPF_DW:
1613 struct bpf_insn insn1 = insn[1];
1614 u64 imm64;
1616 imm64 = (u64)insn1.imm << 32 | (u32)imm;
1617 if (bpf_pseudo_func(insn)) {
1618 /* fixed-length insns for extra jit pass */
1619 ret = emit_addr(rd, imm64, extra_pass, ctx);
1620 if (ret)
1621 return ret;
1622 } else {
1623 emit_imm(rd, imm64, ctx);
1626 return 1;
1629 /* LDX: dst = *(unsigned size *)(src + off) */
1630 case BPF_LDX | BPF_MEM | BPF_B:
1631 case BPF_LDX | BPF_MEM | BPF_H:
1632 case BPF_LDX | BPF_MEM | BPF_W:
1633 case BPF_LDX | BPF_MEM | BPF_DW:
1634 case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1635 case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1636 case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1637 case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1638 /* LDSX: dst = *(signed size *)(src + off) */
1639 case BPF_LDX | BPF_MEMSX | BPF_B:
1640 case BPF_LDX | BPF_MEMSX | BPF_H:
1641 case BPF_LDX | BPF_MEMSX | BPF_W:
1642 case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1643 case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1644 case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1645 /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + RV_REG_ARENA + off) */
1646 case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1647 case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1648 case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1649 case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1651 int insn_len, insns_start;
1652 bool sign_ext;
1654 sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
1655 BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
1657 if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1658 emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx);
1659 rs = RV_REG_T2;
1662 switch (BPF_SIZE(code)) {
1663 case BPF_B:
1664 if (is_12b_int(off)) {
1665 insns_start = ctx->ninsns;
1666 if (sign_ext)
1667 emit(rv_lb(rd, off, rs), ctx);
1668 else
1669 emit(rv_lbu(rd, off, rs), ctx);
1670 insn_len = ctx->ninsns - insns_start;
1671 break;
1674 emit_imm(RV_REG_T1, off, ctx);
1675 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1676 insns_start = ctx->ninsns;
1677 if (sign_ext)
1678 emit(rv_lb(rd, 0, RV_REG_T1), ctx);
1679 else
1680 emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
1681 insn_len = ctx->ninsns - insns_start;
1682 break;
1683 case BPF_H:
1684 if (is_12b_int(off)) {
1685 insns_start = ctx->ninsns;
1686 if (sign_ext)
1687 emit(rv_lh(rd, off, rs), ctx);
1688 else
1689 emit(rv_lhu(rd, off, rs), ctx);
1690 insn_len = ctx->ninsns - insns_start;
1691 break;
1694 emit_imm(RV_REG_T1, off, ctx);
1695 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1696 insns_start = ctx->ninsns;
1697 if (sign_ext)
1698 emit(rv_lh(rd, 0, RV_REG_T1), ctx);
1699 else
1700 emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
1701 insn_len = ctx->ninsns - insns_start;
1702 break;
1703 case BPF_W:
1704 if (is_12b_int(off)) {
1705 insns_start = ctx->ninsns;
1706 if (sign_ext)
1707 emit(rv_lw(rd, off, rs), ctx);
1708 else
1709 emit(rv_lwu(rd, off, rs), ctx);
1710 insn_len = ctx->ninsns - insns_start;
1711 break;
1714 emit_imm(RV_REG_T1, off, ctx);
1715 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1716 insns_start = ctx->ninsns;
1717 if (sign_ext)
1718 emit(rv_lw(rd, 0, RV_REG_T1), ctx);
1719 else
1720 emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
1721 insn_len = ctx->ninsns - insns_start;
1722 break;
1723 case BPF_DW:
1724 if (is_12b_int(off)) {
1725 insns_start = ctx->ninsns;
1726 emit_ld(rd, off, rs, ctx);
1727 insn_len = ctx->ninsns - insns_start;
1728 break;
1731 emit_imm(RV_REG_T1, off, ctx);
1732 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
1733 insns_start = ctx->ninsns;
1734 emit_ld(rd, 0, RV_REG_T1, ctx);
1735 insn_len = ctx->ninsns - insns_start;
1736 break;
1739 ret = add_exception_handler(insn, ctx, rd, insn_len);
1740 if (ret)
1741 return ret;
1743 if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1]))
1744 return 1;
1745 break;
1747 /* speculation barrier */
1748 case BPF_ST | BPF_NOSPEC:
1749 break;
1751 /* ST: *(size *)(dst + off) = imm */
1752 case BPF_ST | BPF_MEM | BPF_B:
1753 emit_imm(RV_REG_T1, imm, ctx);
1754 if (is_12b_int(off)) {
1755 emit(rv_sb(rd, off, RV_REG_T1), ctx);
1756 break;
1759 emit_imm(RV_REG_T2, off, ctx);
1760 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1761 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
1762 break;
1764 case BPF_ST | BPF_MEM | BPF_H:
1765 emit_imm(RV_REG_T1, imm, ctx);
1766 if (is_12b_int(off)) {
1767 emit(rv_sh(rd, off, RV_REG_T1), ctx);
1768 break;
1771 emit_imm(RV_REG_T2, off, ctx);
1772 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1773 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
1774 break;
1775 case BPF_ST | BPF_MEM | BPF_W:
1776 emit_imm(RV_REG_T1, imm, ctx);
1777 if (is_12b_int(off)) {
1778 emit_sw(rd, off, RV_REG_T1, ctx);
1779 break;
1782 emit_imm(RV_REG_T2, off, ctx);
1783 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1784 emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
1785 break;
1786 case BPF_ST | BPF_MEM | BPF_DW:
1787 emit_imm(RV_REG_T1, imm, ctx);
1788 if (is_12b_int(off)) {
1789 emit_sd(rd, off, RV_REG_T1, ctx);
1790 break;
1793 emit_imm(RV_REG_T2, off, ctx);
1794 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1795 emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
1796 break;
1798 case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1799 case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1800 case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1801 case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1803 int insn_len, insns_start;
1805 emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx);
1806 rd = RV_REG_T3;
1808 /* Load imm to a register then store it */
1809 emit_imm(RV_REG_T1, imm, ctx);
1811 switch (BPF_SIZE(code)) {
1812 case BPF_B:
1813 if (is_12b_int(off)) {
1814 insns_start = ctx->ninsns;
1815 emit(rv_sb(rd, off, RV_REG_T1), ctx);
1816 insn_len = ctx->ninsns - insns_start;
1817 break;
1820 emit_imm(RV_REG_T2, off, ctx);
1821 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1822 insns_start = ctx->ninsns;
1823 emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
1824 insn_len = ctx->ninsns - insns_start;
1825 break;
1826 case BPF_H:
1827 if (is_12b_int(off)) {
1828 insns_start = ctx->ninsns;
1829 emit(rv_sh(rd, off, RV_REG_T1), ctx);
1830 insn_len = ctx->ninsns - insns_start;
1831 break;
1834 emit_imm(RV_REG_T2, off, ctx);
1835 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1836 insns_start = ctx->ninsns;
1837 emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
1838 insn_len = ctx->ninsns - insns_start;
1839 break;
1840 case BPF_W:
1841 if (is_12b_int(off)) {
1842 insns_start = ctx->ninsns;
1843 emit_sw(rd, off, RV_REG_T1, ctx);
1844 insn_len = ctx->ninsns - insns_start;
1845 break;
1848 emit_imm(RV_REG_T2, off, ctx);
1849 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1850 insns_start = ctx->ninsns;
1851 emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
1852 insn_len = ctx->ninsns - insns_start;
1853 break;
1854 case BPF_DW:
1855 if (is_12b_int(off)) {
1856 insns_start = ctx->ninsns;
1857 emit_sd(rd, off, RV_REG_T1, ctx);
1858 insn_len = ctx->ninsns - insns_start;
1859 break;
1862 emit_imm(RV_REG_T2, off, ctx);
1863 emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
1864 insns_start = ctx->ninsns;
1865 emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
1866 insn_len = ctx->ninsns - insns_start;
1867 break;
1870 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER,
1871 insn_len);
1872 if (ret)
1873 return ret;
1875 break;
1878 /* STX: *(size *)(dst + off) = src */
1879 case BPF_STX | BPF_MEM | BPF_B:
1880 if (is_12b_int(off)) {
1881 emit(rv_sb(rd, off, rs), ctx);
1882 break;
1885 emit_imm(RV_REG_T1, off, ctx);
1886 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1887 emit(rv_sb(RV_REG_T1, 0, rs), ctx);
1888 break;
1889 case BPF_STX | BPF_MEM | BPF_H:
1890 if (is_12b_int(off)) {
1891 emit(rv_sh(rd, off, rs), ctx);
1892 break;
1895 emit_imm(RV_REG_T1, off, ctx);
1896 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1897 emit(rv_sh(RV_REG_T1, 0, rs), ctx);
1898 break;
1899 case BPF_STX | BPF_MEM | BPF_W:
1900 if (is_12b_int(off)) {
1901 emit_sw(rd, off, rs, ctx);
1902 break;
1905 emit_imm(RV_REG_T1, off, ctx);
1906 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1907 emit_sw(RV_REG_T1, 0, rs, ctx);
1908 break;
1909 case BPF_STX | BPF_MEM | BPF_DW:
1910 if (is_12b_int(off)) {
1911 emit_sd(rd, off, rs, ctx);
1912 break;
1915 emit_imm(RV_REG_T1, off, ctx);
1916 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1917 emit_sd(RV_REG_T1, 0, rs, ctx);
1918 break;
1919 case BPF_STX | BPF_ATOMIC | BPF_W:
1920 case BPF_STX | BPF_ATOMIC | BPF_DW:
1921 emit_atomic(rd, rs, off, imm,
1922 BPF_SIZE(code) == BPF_DW, ctx);
1923 break;
1925 case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1926 case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1927 case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1928 case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1930 int insn_len, insns_start;
1932 emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx);
1933 rd = RV_REG_T2;
1935 switch (BPF_SIZE(code)) {
1936 case BPF_B:
1937 if (is_12b_int(off)) {
1938 insns_start = ctx->ninsns;
1939 emit(rv_sb(rd, off, rs), ctx);
1940 insn_len = ctx->ninsns - insns_start;
1941 break;
1944 emit_imm(RV_REG_T1, off, ctx);
1945 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1946 insns_start = ctx->ninsns;
1947 emit(rv_sb(RV_REG_T1, 0, rs), ctx);
1948 insn_len = ctx->ninsns - insns_start;
1949 break;
1950 case BPF_H:
1951 if (is_12b_int(off)) {
1952 insns_start = ctx->ninsns;
1953 emit(rv_sh(rd, off, rs), ctx);
1954 insn_len = ctx->ninsns - insns_start;
1955 break;
1958 emit_imm(RV_REG_T1, off, ctx);
1959 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1960 insns_start = ctx->ninsns;
1961 emit(rv_sh(RV_REG_T1, 0, rs), ctx);
1962 insn_len = ctx->ninsns - insns_start;
1963 break;
1964 case BPF_W:
1965 if (is_12b_int(off)) {
1966 insns_start = ctx->ninsns;
1967 emit_sw(rd, off, rs, ctx);
1968 insn_len = ctx->ninsns - insns_start;
1969 break;
1972 emit_imm(RV_REG_T1, off, ctx);
1973 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1974 insns_start = ctx->ninsns;
1975 emit_sw(RV_REG_T1, 0, rs, ctx);
1976 insn_len = ctx->ninsns - insns_start;
1977 break;
1978 case BPF_DW:
1979 if (is_12b_int(off)) {
1980 insns_start = ctx->ninsns;
1981 emit_sd(rd, off, rs, ctx);
1982 insn_len = ctx->ninsns - insns_start;
1983 break;
1986 emit_imm(RV_REG_T1, off, ctx);
1987 emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1988 insns_start = ctx->ninsns;
1989 emit_sd(RV_REG_T1, 0, rs, ctx);
1990 insn_len = ctx->ninsns - insns_start;
1991 break;
1994 ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER,
1995 insn_len);
1996 if (ret)
1997 return ret;
1999 break;
2002 default:
2003 pr_err("bpf-jit: unknown opcode %02x\n", code);
2004 return -EINVAL;
2007 return 0;
2010 void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog)
2012 int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
2014 bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, STACK_ALIGN);
2015 if (bpf_stack_adjust)
2016 mark_fp(ctx);
2018 if (seen_reg(RV_REG_RA, ctx))
2019 stack_adjust += 8;
2020 stack_adjust += 8; /* RV_REG_FP */
2021 if (seen_reg(RV_REG_S1, ctx))
2022 stack_adjust += 8;
2023 if (seen_reg(RV_REG_S2, ctx))
2024 stack_adjust += 8;
2025 if (seen_reg(RV_REG_S3, ctx))
2026 stack_adjust += 8;
2027 if (seen_reg(RV_REG_S4, ctx))
2028 stack_adjust += 8;
2029 if (seen_reg(RV_REG_S5, ctx))
2030 stack_adjust += 8;
2031 if (seen_reg(RV_REG_S6, ctx))
2032 stack_adjust += 8;
2033 if (ctx->arena_vm_start)
2034 stack_adjust += 8;
2036 stack_adjust = round_up(stack_adjust, STACK_ALIGN);
2037 stack_adjust += bpf_stack_adjust;
2039 store_offset = stack_adjust - 8;
2041 /* emit kcfi type preamble immediately before the first insn */
2042 emit_kcfi(is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash, ctx);
2044 /* nops reserved for auipc+jalr pair */
2045 for (i = 0; i < RV_FENTRY_NINSNS; i++)
2046 emit(rv_nop(), ctx);
2048 /* First instruction is always setting the tail-call-counter
2049 * (TCC) register. This instruction is skipped for tail calls.
2050 * Force using a 4-byte (non-compressed) instruction.
2052 emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
2054 emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx);
2056 if (seen_reg(RV_REG_RA, ctx)) {
2057 emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx);
2058 store_offset -= 8;
2060 emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx);
2061 store_offset -= 8;
2062 if (seen_reg(RV_REG_S1, ctx)) {
2063 emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx);
2064 store_offset -= 8;
2066 if (seen_reg(RV_REG_S2, ctx)) {
2067 emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx);
2068 store_offset -= 8;
2070 if (seen_reg(RV_REG_S3, ctx)) {
2071 emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx);
2072 store_offset -= 8;
2074 if (seen_reg(RV_REG_S4, ctx)) {
2075 emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx);
2076 store_offset -= 8;
2078 if (seen_reg(RV_REG_S5, ctx)) {
2079 emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx);
2080 store_offset -= 8;
2082 if (seen_reg(RV_REG_S6, ctx)) {
2083 emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
2084 store_offset -= 8;
2086 if (ctx->arena_vm_start) {
2087 emit_sd(RV_REG_SP, store_offset, RV_REG_ARENA, ctx);
2088 store_offset -= 8;
2091 emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
2093 if (bpf_stack_adjust)
2094 emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx);
2096 /* Program contains calls and tail calls, so RV_REG_TCC need
2097 * to be saved across calls.
2099 if (seen_tail_call(ctx) && seen_call(ctx))
2100 emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
2102 ctx->stack_size = stack_adjust;
2104 if (ctx->arena_vm_start)
2105 emit_imm(RV_REG_ARENA, ctx->arena_vm_start, ctx);
2108 void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
2110 __build_epilogue(false, ctx);
2113 bool bpf_jit_supports_kfunc_call(void)
2115 return true;
2118 bool bpf_jit_supports_ptr_xchg(void)
2120 return true;
2123 bool bpf_jit_supports_arena(void)
2125 return true;
2128 bool bpf_jit_supports_percpu_insn(void)
2130 return true;
2133 bool bpf_jit_inlines_helper_call(s32 imm)
2135 switch (imm) {
2136 case BPF_FUNC_get_smp_processor_id:
2137 return true;
2138 default:
2139 return false;