Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / arch / parisc / net / bpf_jit_comp32.c
blob5ff0cf925fe953d09b1339c5d78aad37bbf33dce
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * BPF JIT compiler for PA-RISC (32-bit)
5 * Copyright (c) 2023 Helge Deller <deller@gmx.de>
7 * The code is based on the BPF JIT compiler for RV64 by Björn Töpel and
8 * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan.
9 */
11 #include <linux/bpf.h>
12 #include <linux/filter.h>
13 #include <linux/libgcc.h>
14 #include "bpf_jit.h"
17 * Stack layout during BPF program execution (note: stack grows up):
19 * high
20 * HPPA32 sp => +----------+ <= HPPA32 fp
21 * | saved sp |
22 * | saved rp |
23 * | ... | HPPA32 callee-saved registers
24 * | curr args|
25 * | local var|
26 * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS)
27 * | lo(R9) |
28 * | hi(R9) |
29 * | lo(FP) | JIT scratch space for BPF registers
30 * | hi(FP) |
31 * | ... |
32 * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS
33 * | | - 4 * BPF_JIT_SCRATCH_REGS)
34 * | |
35 * | ... | BPF program stack
36 * | |
37 * | ... | Function call stack
38 * | |
39 * +----------+
40 * low
43 enum {
44 /* Stack layout - these are offsets from top of JIT scratch space. */
45 BPF_R8_HI,
46 BPF_R8_LO,
47 BPF_R9_HI,
48 BPF_R9_LO,
49 BPF_FP_HI,
50 BPF_FP_LO,
51 BPF_AX_HI,
52 BPF_AX_LO,
53 BPF_R0_TEMP_HI,
54 BPF_R0_TEMP_LO,
55 BPF_JIT_SCRATCH_REGS,
58 /* Number of callee-saved registers stored to stack: rp, r3-r18. */
59 #define NR_SAVED_REGISTERS (18 - 3 + 1 + 8)
61 /* Offset from fp for BPF registers stored on stack. */
62 #define STACK_OFFSET(k) (- (NR_SAVED_REGISTERS + k + 1))
63 #define STACK_ALIGN FRAME_SIZE
65 #define EXIT_PTR_LOAD(reg) hppa_ldw(-0x08, HPPA_REG_SP, reg)
66 #define EXIT_PTR_STORE(reg) hppa_stw(reg, -0x08, HPPA_REG_SP)
67 #define EXIT_PTR_JUMP(reg, nop) hppa_bv(HPPA_REG_ZERO, reg, nop)
69 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
70 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
71 #define TMP_REG_R0 (MAX_BPF_JIT_REG + 2)
73 static const s8 regmap[][2] = {
74 /* Return value from in-kernel function, and exit value from eBPF. */
75 [BPF_REG_0] = {HPPA_REG_RET0, HPPA_REG_RET1}, /* HI/LOW */
77 /* Arguments from eBPF program to in-kernel function. */
78 [BPF_REG_1] = {HPPA_R(3), HPPA_R(4)},
79 [BPF_REG_2] = {HPPA_R(5), HPPA_R(6)},
80 [BPF_REG_3] = {HPPA_R(7), HPPA_R(8)},
81 [BPF_REG_4] = {HPPA_R(9), HPPA_R(10)},
82 [BPF_REG_5] = {HPPA_R(11), HPPA_R(12)},
84 [BPF_REG_6] = {HPPA_R(13), HPPA_R(14)},
85 [BPF_REG_7] = {HPPA_R(15), HPPA_R(16)},
87 * Callee-saved registers that in-kernel function will preserve.
88 * Stored on the stack.
90 [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
91 [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
93 /* Read-only frame pointer to access BPF stack. Not needed. */
94 [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
96 /* Temporary register for blinding constants. Stored on the stack. */
97 [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
99 * Temporary registers used by the JIT to operate on registers stored
100 * on the stack. Save t0 and t1 to be used as temporaries in generated
101 * code.
103 [TMP_REG_1] = {HPPA_REG_T3, HPPA_REG_T2},
104 [TMP_REG_2] = {HPPA_REG_T5, HPPA_REG_T4},
106 /* temporary space for BPF_R0 during libgcc and millicode calls */
107 [TMP_REG_R0] = {STACK_OFFSET(BPF_R0_TEMP_HI), STACK_OFFSET(BPF_R0_TEMP_LO)},
110 static s8 hi(const s8 *r)
112 return r[0];
115 static s8 lo(const s8 *r)
117 return r[1];
120 static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
122 REG_SET_SEEN(ctx, rd);
123 if (OPTIMIZE_HPPA && (rs == rd))
124 return;
125 REG_SET_SEEN(ctx, rs);
126 emit(hppa_copy(rs, rd), ctx);
129 static void emit_hppa_xor(const s8 r1, const s8 r2, const s8 r3, struct hppa_jit_context *ctx)
131 REG_SET_SEEN(ctx, r1);
132 REG_SET_SEEN(ctx, r2);
133 REG_SET_SEEN(ctx, r3);
134 if (OPTIMIZE_HPPA && (r1 == r2)) {
135 emit(hppa_copy(HPPA_REG_ZERO, r3), ctx);
136 } else {
137 emit(hppa_xor(r1, r2, r3), ctx);
141 static void emit_imm(const s8 rd, s32 imm, struct hppa_jit_context *ctx)
143 u32 lower = im11(imm);
145 REG_SET_SEEN(ctx, rd);
146 if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) {
147 emit(hppa_ldi(imm, rd), ctx);
148 return;
150 emit(hppa_ldil(imm, rd), ctx);
151 if (OPTIMIZE_HPPA && (lower == 0))
152 return;
153 emit(hppa_ldo(lower, rd, rd), ctx);
156 static void emit_imm32(const s8 *rd, s32 imm, struct hppa_jit_context *ctx)
158 /* Emit immediate into lower bits. */
159 REG_SET_SEEN(ctx, lo(rd));
160 emit_imm(lo(rd), imm, ctx);
162 /* Sign-extend into upper bits. */
163 REG_SET_SEEN(ctx, hi(rd));
164 if (imm >= 0)
165 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
166 else
167 emit(hppa_ldi(-1, hi(rd)), ctx);
170 static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo,
171 struct hppa_jit_context *ctx)
173 emit_imm(hi(rd), imm_hi, ctx);
174 emit_imm(lo(rd), imm_lo, ctx);
177 static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx)
179 const s8 *r0 = regmap[BPF_REG_0];
180 int i;
182 if (is_tail_call) {
184 * goto *(t0 + 4);
185 * Skips first instruction of prologue which initializes tail
186 * call counter. Assumes t0 contains address of target program,
187 * see emit_bpf_tail_call.
189 emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx);
190 emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx);
191 /* in delay slot: */
192 emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx);
194 return;
197 /* load epilogue function pointer and jump to it. */
198 /* exit point is either directly below, or the outest TCC exit function */
199 emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
200 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
202 /* NOTE: we are 32-bit and big-endian, so return lower 32-bit value */
203 emit_hppa_copy(lo(r0), HPPA_REG_RET0, ctx);
205 /* Restore callee-saved registers. */
206 for (i = 3; i <= 18; i++) {
207 if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
208 continue;
209 emit(hppa_ldw(-REG_SIZE * (8 + (i-3)), HPPA_REG_SP, HPPA_R(i)), ctx);
212 /* load original return pointer (stored by outest TCC function) */
213 emit(hppa_ldw(-0x14, HPPA_REG_SP, HPPA_REG_RP), ctx);
214 emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx);
215 /* in delay slot: */
216 emit(hppa_ldw(-0x04, HPPA_REG_SP, HPPA_REG_SP), ctx);
219 static bool is_stacked(s8 reg)
221 return reg < 0;
224 static const s8 *bpf_get_reg64_offset(const s8 *reg, const s8 *tmp,
225 u16 offset_sp, struct hppa_jit_context *ctx)
227 if (is_stacked(hi(reg))) {
228 emit(hppa_ldw(REG_SIZE * hi(reg) - offset_sp, HPPA_REG_SP, hi(tmp)), ctx);
229 emit(hppa_ldw(REG_SIZE * lo(reg) - offset_sp, HPPA_REG_SP, lo(tmp)), ctx);
230 reg = tmp;
232 REG_SET_SEEN(ctx, hi(reg));
233 REG_SET_SEEN(ctx, lo(reg));
234 return reg;
237 static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp,
238 struct hppa_jit_context *ctx)
240 return bpf_get_reg64_offset(reg, tmp, 0, ctx);
243 static const s8 *bpf_get_reg64_ref(const s8 *reg, const s8 *tmp,
244 bool must_load, struct hppa_jit_context *ctx)
246 if (!OPTIMIZE_HPPA)
247 return bpf_get_reg64(reg, tmp, ctx);
249 if (is_stacked(hi(reg))) {
250 if (must_load)
251 emit(hppa_ldw(REG_SIZE * hi(reg), HPPA_REG_SP, hi(tmp)), ctx);
252 reg = tmp;
254 REG_SET_SEEN(ctx, hi(reg));
255 REG_SET_SEEN(ctx, lo(reg));
256 return reg;
260 static void bpf_put_reg64(const s8 *reg, const s8 *src,
261 struct hppa_jit_context *ctx)
263 if (is_stacked(hi(reg))) {
264 emit(hppa_stw(hi(src), REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
265 emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
269 static void bpf_save_R0(struct hppa_jit_context *ctx)
271 bpf_put_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
274 static void bpf_restore_R0(struct hppa_jit_context *ctx)
276 bpf_get_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
280 static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp,
281 struct hppa_jit_context *ctx)
283 if (is_stacked(lo(reg))) {
284 emit(hppa_ldw(REG_SIZE * lo(reg), HPPA_REG_SP, lo(tmp)), ctx);
285 reg = tmp;
287 REG_SET_SEEN(ctx, lo(reg));
288 return reg;
291 static const s8 *bpf_get_reg32_ref(const s8 *reg, const s8 *tmp,
292 struct hppa_jit_context *ctx)
294 if (!OPTIMIZE_HPPA)
295 return bpf_get_reg32(reg, tmp, ctx);
297 if (is_stacked(hi(reg))) {
298 reg = tmp;
300 REG_SET_SEEN(ctx, lo(reg));
301 return reg;
304 static void bpf_put_reg32(const s8 *reg, const s8 *src,
305 struct hppa_jit_context *ctx)
307 if (is_stacked(lo(reg))) {
308 REG_SET_SEEN(ctx, lo(src));
309 emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
310 if (1 && !ctx->prog->aux->verifier_zext) {
311 REG_SET_SEEN(ctx, hi(reg));
312 emit(hppa_stw(HPPA_REG_ZERO, REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
314 } else if (1 && !ctx->prog->aux->verifier_zext) {
315 REG_SET_SEEN(ctx, hi(reg));
316 emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
320 /* extern hppa millicode functions */
321 extern void $$mulI(void);
322 extern void $$divU(void);
323 extern void $$remU(void);
325 static void emit_call_millicode(void *func, const s8 arg0,
326 const s8 arg1, u8 opcode, struct hppa_jit_context *ctx)
328 u32 func_addr;
330 emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx);
331 emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx);
333 /* libcgcc overwrites HPPA_REG_RET0/1, save temp. in dest. */
334 if (arg0 != HPPA_REG_RET1)
335 bpf_save_R0(ctx);
337 func_addr = (uintptr_t) dereference_function_descriptor(func);
338 emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
339 /* skip the following be_l instruction if divisor is zero. */
340 if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
341 if (BPF_OP(opcode) == BPF_DIV)
342 emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
343 else
344 emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
345 emit(hppa_or_cond(HPPA_REG_ARG1, HPPA_REG_ZERO, 1, 0, HPPA_REG_ZERO), ctx);
347 /* Note: millicode functions use r31 as return pointer instead of rp */
348 emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx);
349 emit(hppa_nop(), ctx); /* this nop is needed here for delay slot */
351 /* Note: millicode functions return result in RET1, not RET0 */
352 emit_hppa_copy(HPPA_REG_RET1, arg0, ctx);
354 /* restore HPPA_REG_RET0/1, temp. save in dest. */
355 if (arg0 != HPPA_REG_RET1)
356 bpf_restore_R0(ctx);
359 static void emit_call_libgcc_ll(void *func, const s8 *arg0,
360 const s8 *arg1, u8 opcode, struct hppa_jit_context *ctx)
362 u32 func_addr;
364 emit_hppa_copy(lo(arg0), HPPA_REG_ARG0, ctx);
365 emit_hppa_copy(hi(arg0), HPPA_REG_ARG1, ctx);
366 emit_hppa_copy(lo(arg1), HPPA_REG_ARG2, ctx);
367 emit_hppa_copy(hi(arg1), HPPA_REG_ARG3, ctx);
369 /* libcgcc overwrites HPPA_REG_RET0/_RET1, so keep copy of R0 on stack */
370 if (hi(arg0) != HPPA_REG_RET0)
371 bpf_save_R0(ctx);
373 /* prepare stack */
374 emit(hppa_ldo(2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
376 func_addr = (uintptr_t) dereference_function_descriptor(func);
377 emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
378 /* zero out the following be_l instruction if divisor is 0 (and set default values) */
379 if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
380 emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx);
381 if (BPF_OP(opcode) == BPF_DIV)
382 emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
383 else
384 emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
385 emit(hppa_or_cond(HPPA_REG_ARG2, HPPA_REG_ARG3, 1, 0, HPPA_REG_ZERO), ctx);
387 emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
388 emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
390 /* restore stack */
391 emit(hppa_ldo(-2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
393 emit_hppa_copy(HPPA_REG_RET0, hi(arg0), ctx);
394 emit_hppa_copy(HPPA_REG_RET1, lo(arg0), ctx);
396 /* restore HPPA_REG_RET0/_RET1 */
397 if (hi(arg0) != HPPA_REG_RET0)
398 bpf_restore_R0(ctx);
401 static void emit_jump(s32 paoff, bool force_far,
402 struct hppa_jit_context *ctx)
404 unsigned long pc, addr;
406 /* Note: allocate 2 instructions for jumps if force_far is set. */
407 if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 17)) {
408 /* use BL,short branch followed by nop() */
409 emit(hppa_bl(paoff - HPPA_BRANCH_DISPLACEMENT, HPPA_REG_ZERO), ctx);
410 if (force_far)
411 emit(hppa_nop(), ctx);
412 return;
415 pc = (uintptr_t) &ctx->insns[ctx->ninsns];
416 addr = pc + (paoff * HPPA_INSN_SIZE);
417 emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
418 emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); // be,l,n addr(sr4,r31), %sr0, %r31
421 static void emit_alu_i64(const s8 *dst, s32 imm,
422 struct hppa_jit_context *ctx, const u8 op)
424 const s8 *tmp1 = regmap[TMP_REG_1];
425 const s8 *rd;
427 if (0 && op == BPF_MOV)
428 rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
429 else
430 rd = bpf_get_reg64(dst, tmp1, ctx);
432 /* dst = dst OP imm */
433 switch (op) {
434 case BPF_MOV:
435 emit_imm32(rd, imm, ctx);
436 break;
437 case BPF_AND:
438 emit_imm(HPPA_REG_T0, imm, ctx);
439 emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
440 if (imm >= 0)
441 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
442 break;
443 case BPF_OR:
444 emit_imm(HPPA_REG_T0, imm, ctx);
445 emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
446 if (imm < 0)
447 emit_imm(hi(rd), -1, ctx);
448 break;
449 case BPF_XOR:
450 emit_imm(HPPA_REG_T0, imm, ctx);
451 emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
452 if (imm < 0) {
453 emit_imm(HPPA_REG_T0, -1, ctx);
454 emit_hppa_xor(hi(rd), HPPA_REG_T0, hi(rd), ctx);
456 break;
457 case BPF_LSH:
458 if (imm == 0)
459 break;
460 if (imm > 32) {
461 imm -= 32;
462 emit(hppa_zdep(lo(rd), imm, imm, hi(rd)), ctx);
463 emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
464 } else if (imm == 32) {
465 emit_hppa_copy(lo(rd), hi(rd), ctx);
466 emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
467 } else {
468 emit(hppa_shd(hi(rd), lo(rd), 32 - imm, hi(rd)), ctx);
469 emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
471 break;
472 case BPF_RSH:
473 if (imm == 0)
474 break;
475 if (imm > 32) {
476 imm -= 32;
477 emit(hppa_shr(hi(rd), imm, lo(rd)), ctx);
478 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
479 } else if (imm == 32) {
480 emit_hppa_copy(hi(rd), lo(rd), ctx);
481 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
482 } else {
483 emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
484 emit(hppa_shr(hi(rd), imm, hi(rd)), ctx);
486 break;
487 case BPF_ARSH:
488 if (imm == 0)
489 break;
490 if (imm > 32) {
491 imm -= 32;
492 emit(hppa_extrws(hi(rd), 31 - imm, imm, lo(rd)), ctx);
493 emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
494 } else if (imm == 32) {
495 emit_hppa_copy(hi(rd), lo(rd), ctx);
496 emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
497 } else {
498 emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
499 emit(hppa_extrws(hi(rd), 31 - imm, imm, hi(rd)), ctx);
501 break;
502 default:
503 WARN_ON(1);
506 bpf_put_reg64(dst, rd, ctx);
509 static void emit_alu_i32(const s8 *dst, s32 imm,
510 struct hppa_jit_context *ctx, const u8 op)
512 const s8 *tmp1 = regmap[TMP_REG_1];
513 const s8 *rd = bpf_get_reg32(dst, tmp1, ctx);
515 if (op == BPF_MOV)
516 rd = bpf_get_reg32_ref(dst, tmp1, ctx);
517 else
518 rd = bpf_get_reg32(dst, tmp1, ctx);
520 /* dst = dst OP imm */
521 switch (op) {
522 case BPF_MOV:
523 emit_imm(lo(rd), imm, ctx);
524 break;
525 case BPF_ADD:
526 emit_imm(HPPA_REG_T0, imm, ctx);
527 emit(hppa_add(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
528 break;
529 case BPF_SUB:
530 emit_imm(HPPA_REG_T0, imm, ctx);
531 emit(hppa_sub(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
532 break;
533 case BPF_AND:
534 emit_imm(HPPA_REG_T0, imm, ctx);
535 emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
536 break;
537 case BPF_OR:
538 emit_imm(HPPA_REG_T0, imm, ctx);
539 emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
540 break;
541 case BPF_XOR:
542 emit_imm(HPPA_REG_T0, imm, ctx);
543 emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
544 break;
545 case BPF_LSH:
546 if (imm != 0)
547 emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
548 break;
549 case BPF_RSH:
550 if (imm != 0)
551 emit(hppa_shr(lo(rd), imm, lo(rd)), ctx);
552 break;
553 case BPF_ARSH:
554 if (imm != 0)
555 emit(hppa_extrws(lo(rd), 31 - imm, imm, lo(rd)), ctx);
556 break;
557 default:
558 WARN_ON(1);
561 bpf_put_reg32(dst, rd, ctx);
564 static void emit_alu_r64(const s8 *dst, const s8 *src,
565 struct hppa_jit_context *ctx, const u8 op)
567 const s8 *tmp1 = regmap[TMP_REG_1];
568 const s8 *tmp2 = regmap[TMP_REG_2];
569 const s8 *rd;
570 const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
572 if (op == BPF_MOV)
573 rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
574 else
575 rd = bpf_get_reg64(dst, tmp1, ctx);
577 /* dst = dst OP src */
578 switch (op) {
579 case BPF_MOV:
580 emit_hppa_copy(lo(rs), lo(rd), ctx);
581 emit_hppa_copy(hi(rs), hi(rd), ctx);
582 break;
583 case BPF_ADD:
584 emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
585 emit(hppa_addc(hi(rd), hi(rs), hi(rd)), ctx);
586 break;
587 case BPF_SUB:
588 emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
589 emit(hppa_subb(hi(rd), hi(rs), hi(rd)), ctx);
590 break;
591 case BPF_AND:
592 emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
593 emit(hppa_and(hi(rd), hi(rs), hi(rd)), ctx);
594 break;
595 case BPF_OR:
596 emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
597 emit(hppa_or(hi(rd), hi(rs), hi(rd)), ctx);
598 break;
599 case BPF_XOR:
600 emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
601 emit_hppa_xor(hi(rd), hi(rs), hi(rd), ctx);
602 break;
603 case BPF_MUL:
604 emit_call_libgcc_ll(__muldi3, rd, rs, op, ctx);
605 break;
606 case BPF_DIV:
607 emit_call_libgcc_ll(&hppa_div64, rd, rs, op, ctx);
608 break;
609 case BPF_MOD:
610 emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, op, ctx);
611 break;
612 case BPF_LSH:
613 emit_call_libgcc_ll(__ashldi3, rd, rs, op, ctx);
614 break;
615 case BPF_RSH:
616 emit_call_libgcc_ll(__lshrdi3, rd, rs, op, ctx);
617 break;
618 case BPF_ARSH:
619 emit_call_libgcc_ll(__ashrdi3, rd, rs, op, ctx);
620 break;
621 case BPF_NEG:
622 emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);
623 emit(hppa_subb(HPPA_REG_ZERO, hi(rd), hi(rd)), ctx);
624 break;
625 default:
626 WARN_ON(1);
629 bpf_put_reg64(dst, rd, ctx);
632 static void emit_alu_r32(const s8 *dst, const s8 *src,
633 struct hppa_jit_context *ctx, const u8 op)
635 const s8 *tmp1 = regmap[TMP_REG_1];
636 const s8 *tmp2 = regmap[TMP_REG_2];
637 const s8 *rd;
638 const s8 *rs = bpf_get_reg32(src, tmp2, ctx);
640 if (op == BPF_MOV)
641 rd = bpf_get_reg32_ref(dst, tmp1, ctx);
642 else
643 rd = bpf_get_reg32(dst, tmp1, ctx);
645 /* dst = dst OP src */
646 switch (op) {
647 case BPF_MOV:
648 emit_hppa_copy(lo(rs), lo(rd), ctx);
649 break;
650 case BPF_ADD:
651 emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
652 break;
653 case BPF_SUB:
654 emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
655 break;
656 case BPF_AND:
657 emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
658 break;
659 case BPF_OR:
660 emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
661 break;
662 case BPF_XOR:
663 emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
664 break;
665 case BPF_MUL:
666 emit_call_millicode($$mulI, lo(rd), lo(rs), op, ctx);
667 break;
668 case BPF_DIV:
669 emit_call_millicode($$divU, lo(rd), lo(rs), op, ctx);
670 break;
671 case BPF_MOD:
672 emit_call_millicode($$remU, lo(rd), lo(rs), op, ctx);
673 break;
674 case BPF_LSH:
675 emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
676 emit(hppa_mtsar(HPPA_REG_T0), ctx);
677 emit(hppa_depwz_sar(lo(rd), lo(rd)), ctx);
678 break;
679 case BPF_RSH:
680 emit(hppa_mtsar(lo(rs)), ctx);
681 emit(hppa_shrpw_sar(lo(rd), lo(rd)), ctx);
682 break;
683 case BPF_ARSH: /* sign extending arithmetic shift right */
684 // emit(hppa_beq(lo(rs), HPPA_REG_ZERO, 2), ctx);
685 emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
686 emit(hppa_mtsar(HPPA_REG_T0), ctx);
687 emit(hppa_extrws_sar(lo(rd), lo(rd)), ctx);
688 break;
689 case BPF_NEG:
690 emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx); // sub r0,rd,rd
691 break;
692 default:
693 WARN_ON(1);
696 bpf_put_reg32(dst, rd, ctx);
699 static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 paoff,
700 struct hppa_jit_context *ctx, const u8 op)
702 int e, s = ctx->ninsns;
703 const s8 *tmp1 = regmap[TMP_REG_1];
704 const s8 *tmp2 = regmap[TMP_REG_2];
706 const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx);
707 const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx);
710 * NO_JUMP skips over the rest of the instructions and the
711 * emit_jump, meaning the BPF branch is not taken.
712 * JUMP skips directly to the emit_jump, meaning
713 * the BPF branch is taken.
715 * The fallthrough case results in the BPF branch being taken.
717 #define NO_JUMP(idx) (2 + (idx) - 1)
718 #define JUMP(idx) (0 + (idx) - 1)
720 switch (op) {
721 case BPF_JEQ:
722 emit(hppa_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
723 emit(hppa_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
724 break;
725 case BPF_JGT:
726 emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
727 emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
728 emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
729 break;
730 case BPF_JLT:
731 emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
732 emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
733 emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
734 break;
735 case BPF_JGE:
736 emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
737 emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
738 emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
739 break;
740 case BPF_JLE:
741 emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
742 emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
743 emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
744 break;
745 case BPF_JNE:
746 emit(hppa_bne(hi(rs1), hi(rs2), JUMP(1)), ctx);
747 emit(hppa_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
748 break;
749 case BPF_JSGT:
750 emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
751 emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
752 emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
753 break;
754 case BPF_JSLT:
755 emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
756 emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
757 emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
758 break;
759 case BPF_JSGE:
760 emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
761 emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
762 emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
763 break;
764 case BPF_JSLE:
765 emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
766 emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
767 emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
768 break;
769 case BPF_JSET:
770 emit(hppa_and(hi(rs1), hi(rs2), HPPA_REG_T0), ctx);
771 emit(hppa_and(lo(rs1), lo(rs2), HPPA_REG_T1), ctx);
772 emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, JUMP(1)), ctx);
773 emit(hppa_beq(HPPA_REG_T1, HPPA_REG_ZERO, NO_JUMP(0)), ctx);
774 break;
775 default:
776 WARN_ON(1);
779 #undef NO_JUMP
780 #undef JUMP
782 e = ctx->ninsns;
783 /* Adjust for extra insns. */
784 paoff -= (e - s);
785 emit_jump(paoff, true, ctx);
786 return 0;
789 static int emit_bcc(u8 op, u8 rd, u8 rs, int paoff, struct hppa_jit_context *ctx)
791 int e, s;
792 bool far = false;
793 int off;
795 if (op == BPF_JSET) {
797 * BPF_JSET is a special case: it has no inverse so we always
798 * treat it as a far branch.
800 emit(hppa_and(rd, rs, HPPA_REG_T0), ctx);
801 paoff -= 1; /* reduce offset due to hppa_and() above */
802 rd = HPPA_REG_T0;
803 rs = HPPA_REG_ZERO;
804 op = BPF_JNE;
807 s = ctx->ninsns;
809 if (!relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 12)) {
810 op = invert_bpf_cond(op);
811 far = true;
815 * For a far branch, the condition is negated and we jump over the
816 * branch itself, and the three instructions from emit_jump.
817 * For a near branch, just use paoff.
819 off = far ? (HPPA_BRANCH_DISPLACEMENT - 1) : paoff - HPPA_BRANCH_DISPLACEMENT;
821 switch (op) {
822 /* IF (dst COND src) JUMP off */
823 case BPF_JEQ:
824 emit(hppa_beq(rd, rs, off), ctx);
825 break;
826 case BPF_JGT:
827 emit(hppa_bgtu(rd, rs, off), ctx);
828 break;
829 case BPF_JLT:
830 emit(hppa_bltu(rd, rs, off), ctx);
831 break;
832 case BPF_JGE:
833 emit(hppa_bgeu(rd, rs, off), ctx);
834 break;
835 case BPF_JLE:
836 emit(hppa_bleu(rd, rs, off), ctx);
837 break;
838 case BPF_JNE:
839 emit(hppa_bne(rd, rs, off), ctx);
840 break;
841 case BPF_JSGT:
842 emit(hppa_bgt(rd, rs, off), ctx);
843 break;
844 case BPF_JSLT:
845 emit(hppa_blt(rd, rs, off), ctx);
846 break;
847 case BPF_JSGE:
848 emit(hppa_bge(rd, rs, off), ctx);
849 break;
850 case BPF_JSLE:
851 emit(hppa_ble(rd, rs, off), ctx);
852 break;
853 default:
854 WARN_ON(1);
857 if (far) {
858 e = ctx->ninsns;
859 /* Adjust for extra insns. */
860 paoff -= (e - s);
861 emit_jump(paoff, true, ctx);
863 return 0;
866 static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 paoff,
867 struct hppa_jit_context *ctx, const u8 op)
869 int e, s = ctx->ninsns;
870 const s8 *tmp1 = regmap[TMP_REG_1];
871 const s8 *tmp2 = regmap[TMP_REG_2];
873 const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx);
874 const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx);
876 e = ctx->ninsns;
877 /* Adjust for extra insns. */
878 paoff -= (e - s);
880 if (emit_bcc(op, lo(rs1), lo(rs2), paoff, ctx))
881 return -1;
883 return 0;
886 static void emit_call(bool fixed, u64 addr, struct hppa_jit_context *ctx)
888 const s8 *tmp = regmap[TMP_REG_1];
889 const s8 *r0 = regmap[BPF_REG_0];
890 const s8 *reg;
891 const int offset_sp = 2 * STACK_ALIGN;
893 /* prepare stack */
894 emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
896 /* load R1 & R2 in registers, R3-R5 to stack. */
897 reg = bpf_get_reg64_offset(regmap[BPF_REG_5], tmp, offset_sp, ctx);
898 emit(hppa_stw(hi(reg), -0x48, HPPA_REG_SP), ctx);
899 emit(hppa_stw(lo(reg), -0x44, HPPA_REG_SP), ctx);
901 reg = bpf_get_reg64_offset(regmap[BPF_REG_4], tmp, offset_sp, ctx);
902 emit(hppa_stw(hi(reg), -0x40, HPPA_REG_SP), ctx);
903 emit(hppa_stw(lo(reg), -0x3c, HPPA_REG_SP), ctx);
905 reg = bpf_get_reg64_offset(regmap[BPF_REG_3], tmp, offset_sp, ctx);
906 emit(hppa_stw(hi(reg), -0x38, HPPA_REG_SP), ctx);
907 emit(hppa_stw(lo(reg), -0x34, HPPA_REG_SP), ctx);
909 reg = bpf_get_reg64_offset(regmap[BPF_REG_2], tmp, offset_sp, ctx);
910 emit_hppa_copy(hi(reg), HPPA_REG_ARG3, ctx);
911 emit_hppa_copy(lo(reg), HPPA_REG_ARG2, ctx);
913 reg = bpf_get_reg64_offset(regmap[BPF_REG_1], tmp, offset_sp, ctx);
914 emit_hppa_copy(hi(reg), HPPA_REG_ARG1, ctx);
915 emit_hppa_copy(lo(reg), HPPA_REG_ARG0, ctx);
917 /* backup TCC */
918 if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
919 emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx);
922 * Use ldil() to load absolute address. Don't use emit_imm as the
923 * number of emitted instructions should not depend on the value of
924 * addr.
926 emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
927 emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
928 /* set return address in delay slot */
929 emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
931 /* restore TCC */
932 if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
933 emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx);
935 /* restore stack */
936 emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
938 /* set return value. */
939 emit_hppa_copy(HPPA_REG_RET0, hi(r0), ctx);
940 emit_hppa_copy(HPPA_REG_RET1, lo(r0), ctx);
943 static int emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx)
946 * R1 -> &ctx
947 * R2 -> &array
948 * R3 -> index
950 int off;
951 const s8 *arr_reg = regmap[BPF_REG_2];
952 const s8 *idx_reg = regmap[BPF_REG_3];
953 struct bpf_array bpfa;
954 struct bpf_prog bpfp;
956 /* get address of TCC main exit function for error case into rp */
957 emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
959 /* max_entries = array->map.max_entries; */
960 off = offsetof(struct bpf_array, map.max_entries);
961 BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4);
962 emit(hppa_ldw(off, lo(arr_reg), HPPA_REG_T1), ctx);
965 * if (index >= max_entries)
966 * goto out;
968 emit(hppa_bltu(lo(idx_reg), HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
969 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
972 * if (--tcc < 0)
973 * goto out;
975 REG_FORCE_SEEN(ctx, HPPA_REG_TCC);
976 emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx);
977 emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
978 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
981 * prog = array->ptrs[index];
982 * if (!prog)
983 * goto out;
985 BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 4);
986 emit(hppa_sh2add(lo(idx_reg), lo(arr_reg), HPPA_REG_T0), ctx);
987 off = offsetof(struct bpf_array, ptrs);
988 BUILD_BUG_ON(!relative_bits_ok(off, 11));
989 emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
990 emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
991 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
994 * tcc = temp_tcc;
995 * goto *(prog->bpf_func + 4);
997 off = offsetof(struct bpf_prog, bpf_func);
998 BUILD_BUG_ON(!relative_bits_ok(off, 11));
999 BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 4);
1000 emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
1001 /* Epilogue jumps to *(t0 + 4). */
1002 __build_epilogue(true, ctx);
1003 return 0;
1006 static int emit_load_r64(const s8 *dst, const s8 *src, s16 off,
1007 struct hppa_jit_context *ctx, const u8 size)
1009 const s8 *tmp1 = regmap[TMP_REG_1];
1010 const s8 *tmp2 = regmap[TMP_REG_2];
1011 const s8 *rd = bpf_get_reg64_ref(dst, tmp1, ctx->prog->aux->verifier_zext, ctx);
1012 const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1013 s8 srcreg;
1015 /* need to calculate address since offset does not fit in 14 bits? */
1016 if (relative_bits_ok(off, 14))
1017 srcreg = lo(rs);
1018 else {
1019 /* need to use R1 here, since addil puts result into R1 */
1020 srcreg = HPPA_REG_R1;
1021 emit(hppa_addil(off, lo(rs)), ctx);
1022 off = im11(off);
1025 /* LDX: dst = *(size *)(src + off) */
1026 switch (size) {
1027 case BPF_B:
1028 emit(hppa_ldb(off + 0, srcreg, lo(rd)), ctx);
1029 if (!ctx->prog->aux->verifier_zext)
1030 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1031 break;
1032 case BPF_H:
1033 emit(hppa_ldh(off + 0, srcreg, lo(rd)), ctx);
1034 if (!ctx->prog->aux->verifier_zext)
1035 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1036 break;
1037 case BPF_W:
1038 emit(hppa_ldw(off + 0, srcreg, lo(rd)), ctx);
1039 if (!ctx->prog->aux->verifier_zext)
1040 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1041 break;
1042 case BPF_DW:
1043 emit(hppa_ldw(off + 0, srcreg, hi(rd)), ctx);
1044 emit(hppa_ldw(off + 4, srcreg, lo(rd)), ctx);
1045 break;
1048 bpf_put_reg64(dst, rd, ctx);
1049 return 0;
1052 static int emit_store_r64(const s8 *dst, const s8 *src, s16 off,
1053 struct hppa_jit_context *ctx, const u8 size,
1054 const u8 mode)
1056 const s8 *tmp1 = regmap[TMP_REG_1];
1057 const s8 *tmp2 = regmap[TMP_REG_2];
1058 const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1059 const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1060 s8 dstreg;
1062 /* need to calculate address since offset does not fit in 14 bits? */
1063 if (relative_bits_ok(off, 14))
1064 dstreg = lo(rd);
1065 else {
1066 /* need to use R1 here, since addil puts result into R1 */
1067 dstreg = HPPA_REG_R1;
1068 emit(hppa_addil(off, lo(rd)), ctx);
1069 off = im11(off);
1072 /* ST: *(size *)(dst + off) = imm */
1073 switch (size) {
1074 case BPF_B:
1075 emit(hppa_stb(lo(rs), off + 0, dstreg), ctx);
1076 break;
1077 case BPF_H:
1078 emit(hppa_sth(lo(rs), off + 0, dstreg), ctx);
1079 break;
1080 case BPF_W:
1081 emit(hppa_stw(lo(rs), off + 0, dstreg), ctx);
1082 break;
1083 case BPF_DW:
1084 emit(hppa_stw(hi(rs), off + 0, dstreg), ctx);
1085 emit(hppa_stw(lo(rs), off + 4, dstreg), ctx);
1086 break;
1089 return 0;
1092 static void emit_rev16(const s8 rd, struct hppa_jit_context *ctx)
1094 emit(hppa_extru(rd, 23, 8, HPPA_REG_T1), ctx);
1095 emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx);
1096 emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx);
1099 static void emit_rev32(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
1101 emit(hppa_shrpw(rs, rs, 16, HPPA_REG_T1), ctx);
1102 emit(hppa_depwz(HPPA_REG_T1, 15, 8, HPPA_REG_T1), ctx);
1103 emit(hppa_shrpw(rs, HPPA_REG_T1, 8, rd), ctx);
1106 static void emit_zext64(const s8 *dst, struct hppa_jit_context *ctx)
1108 const s8 *rd;
1109 const s8 *tmp1 = regmap[TMP_REG_1];
1111 rd = bpf_get_reg64(dst, tmp1, ctx);
1112 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1113 bpf_put_reg64(dst, rd, ctx);
1116 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx,
1117 bool extra_pass)
1119 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
1120 BPF_CLASS(insn->code) == BPF_JMP;
1121 int s, e, paoff, i = insn - ctx->prog->insnsi;
1122 u8 code = insn->code;
1123 s16 off = insn->off;
1124 s32 imm = insn->imm;
1126 const s8 *dst = regmap[insn->dst_reg];
1127 const s8 *src = regmap[insn->src_reg];
1128 const s8 *tmp1 = regmap[TMP_REG_1];
1129 const s8 *tmp2 = regmap[TMP_REG_2];
1131 if (0) printk("CLASS %03d CODE %#02x ALU64:%d BPF_SIZE %#02x "
1132 "BPF_CODE %#02x src_reg %d dst_reg %d\n",
1133 BPF_CLASS(code), code, (code & BPF_ALU64) ? 1:0, BPF_SIZE(code),
1134 BPF_OP(code), insn->src_reg, insn->dst_reg);
1136 switch (code) {
1137 /* dst = src */
1138 case BPF_ALU64 | BPF_MOV | BPF_X:
1140 case BPF_ALU64 | BPF_ADD | BPF_X:
1141 case BPF_ALU64 | BPF_ADD | BPF_K:
1143 case BPF_ALU64 | BPF_SUB | BPF_X:
1144 case BPF_ALU64 | BPF_SUB | BPF_K:
1146 case BPF_ALU64 | BPF_AND | BPF_X:
1147 case BPF_ALU64 | BPF_OR | BPF_X:
1148 case BPF_ALU64 | BPF_XOR | BPF_X:
1150 case BPF_ALU64 | BPF_MUL | BPF_X:
1151 case BPF_ALU64 | BPF_MUL | BPF_K:
1153 case BPF_ALU64 | BPF_DIV | BPF_X:
1154 case BPF_ALU64 | BPF_DIV | BPF_K:
1156 case BPF_ALU64 | BPF_MOD | BPF_X:
1157 case BPF_ALU64 | BPF_MOD | BPF_K:
1159 case BPF_ALU64 | BPF_LSH | BPF_X:
1160 case BPF_ALU64 | BPF_RSH | BPF_X:
1161 case BPF_ALU64 | BPF_ARSH | BPF_X:
1162 if (BPF_SRC(code) == BPF_K) {
1163 emit_imm32(tmp2, imm, ctx);
1164 src = tmp2;
1166 emit_alu_r64(dst, src, ctx, BPF_OP(code));
1167 break;
1169 /* dst = -dst */
1170 case BPF_ALU64 | BPF_NEG:
1171 emit_alu_r64(dst, tmp2, ctx, BPF_OP(code));
1172 break;
1174 case BPF_ALU64 | BPF_MOV | BPF_K:
1175 case BPF_ALU64 | BPF_AND | BPF_K:
1176 case BPF_ALU64 | BPF_OR | BPF_K:
1177 case BPF_ALU64 | BPF_XOR | BPF_K:
1178 case BPF_ALU64 | BPF_LSH | BPF_K:
1179 case BPF_ALU64 | BPF_RSH | BPF_K:
1180 case BPF_ALU64 | BPF_ARSH | BPF_K:
1181 emit_alu_i64(dst, imm, ctx, BPF_OP(code));
1182 break;
1184 case BPF_ALU | BPF_MOV | BPF_X:
1185 if (imm == 1) {
1186 /* Special mov32 for zext. */
1187 emit_zext64(dst, ctx);
1188 break;
1190 fallthrough;
1191 /* dst = dst OP src */
1192 case BPF_ALU | BPF_ADD | BPF_X:
1193 case BPF_ALU | BPF_SUB | BPF_X:
1194 case BPF_ALU | BPF_AND | BPF_X:
1195 case BPF_ALU | BPF_OR | BPF_X:
1196 case BPF_ALU | BPF_XOR | BPF_X:
1198 case BPF_ALU | BPF_MUL | BPF_X:
1199 case BPF_ALU | BPF_MUL | BPF_K:
1201 case BPF_ALU | BPF_DIV | BPF_X:
1202 case BPF_ALU | BPF_DIV | BPF_K:
1204 case BPF_ALU | BPF_MOD | BPF_X:
1205 case BPF_ALU | BPF_MOD | BPF_K:
1207 case BPF_ALU | BPF_LSH | BPF_X:
1208 case BPF_ALU | BPF_RSH | BPF_X:
1209 case BPF_ALU | BPF_ARSH | BPF_X:
1210 if (BPF_SRC(code) == BPF_K) {
1211 emit_imm32(tmp2, imm, ctx);
1212 src = tmp2;
1214 emit_alu_r32(dst, src, ctx, BPF_OP(code));
1215 break;
1217 /* dst = dst OP imm */
1218 case BPF_ALU | BPF_MOV | BPF_K:
1219 case BPF_ALU | BPF_ADD | BPF_K:
1220 case BPF_ALU | BPF_SUB | BPF_K:
1221 case BPF_ALU | BPF_AND | BPF_K:
1222 case BPF_ALU | BPF_OR | BPF_K:
1223 case BPF_ALU | BPF_XOR | BPF_K:
1224 case BPF_ALU | BPF_LSH | BPF_K:
1225 case BPF_ALU | BPF_RSH | BPF_K:
1226 case BPF_ALU | BPF_ARSH | BPF_K:
1228 * mul,div,mod are handled in the BPF_X case.
1230 emit_alu_i32(dst, imm, ctx, BPF_OP(code));
1231 break;
1233 /* dst = -dst */
1234 case BPF_ALU | BPF_NEG:
1236 * src is ignored---choose tmp2 as a dummy register since it
1237 * is not on the stack.
1239 emit_alu_r32(dst, tmp2, ctx, BPF_OP(code));
1240 break;
1242 /* dst = BSWAP##imm(dst) */
1243 case BPF_ALU | BPF_END | BPF_FROM_BE:
1245 const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1247 switch (imm) {
1248 case 16:
1249 /* zero-extend 16 bits into 64 bits */
1250 emit(hppa_extru(lo(rd), 31, 16, lo(rd)), ctx);
1251 fallthrough;
1252 case 32:
1253 /* zero-extend 32 bits into 64 bits */
1254 if (!ctx->prog->aux->verifier_zext)
1255 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1256 break;
1257 case 64:
1258 /* Do nothing. */
1259 break;
1260 default:
1261 pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1262 return -1;
1265 bpf_put_reg64(dst, rd, ctx);
1266 break;
1269 case BPF_ALU | BPF_END | BPF_FROM_LE:
1271 const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1273 switch (imm) {
1274 case 16:
1275 emit_rev16(lo(rd), ctx);
1276 if (!ctx->prog->aux->verifier_zext)
1277 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1278 break;
1279 case 32:
1280 emit_rev32(lo(rd), lo(rd), ctx);
1281 if (!ctx->prog->aux->verifier_zext)
1282 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1283 break;
1284 case 64:
1285 /* Swap upper and lower halves, then each half. */
1286 emit_hppa_copy(hi(rd), HPPA_REG_T0, ctx);
1287 emit_rev32(lo(rd), hi(rd), ctx);
1288 emit_rev32(HPPA_REG_T0, lo(rd), ctx);
1289 break;
1290 default:
1291 pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1292 return -1;
1295 bpf_put_reg64(dst, rd, ctx);
1296 break;
1298 /* JUMP off */
1299 case BPF_JMP | BPF_JA:
1300 paoff = hppa_offset(i, off, ctx);
1301 emit_jump(paoff, false, ctx);
1302 break;
1303 /* function call */
1304 case BPF_JMP | BPF_CALL:
1306 bool fixed;
1307 int ret;
1308 u64 addr;
1310 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
1311 &fixed);
1312 if (ret < 0)
1313 return ret;
1314 emit_call(fixed, addr, ctx);
1315 break;
1317 /* tail call */
1318 case BPF_JMP | BPF_TAIL_CALL:
1319 REG_SET_SEEN_ALL(ctx);
1320 if (emit_bpf_tail_call(i, ctx))
1321 return -1;
1322 break;
1323 /* IF (dst COND imm) JUMP off */
1324 case BPF_JMP | BPF_JEQ | BPF_X:
1325 case BPF_JMP | BPF_JEQ | BPF_K:
1326 case BPF_JMP32 | BPF_JEQ | BPF_X:
1327 case BPF_JMP32 | BPF_JEQ | BPF_K:
1329 case BPF_JMP | BPF_JNE | BPF_X:
1330 case BPF_JMP | BPF_JNE | BPF_K:
1331 case BPF_JMP32 | BPF_JNE | BPF_X:
1332 case BPF_JMP32 | BPF_JNE | BPF_K:
1334 case BPF_JMP | BPF_JLE | BPF_X:
1335 case BPF_JMP | BPF_JLE | BPF_K:
1336 case BPF_JMP32 | BPF_JLE | BPF_X:
1337 case BPF_JMP32 | BPF_JLE | BPF_K:
1339 case BPF_JMP | BPF_JLT | BPF_X:
1340 case BPF_JMP | BPF_JLT | BPF_K:
1341 case BPF_JMP32 | BPF_JLT | BPF_X:
1342 case BPF_JMP32 | BPF_JLT | BPF_K:
1344 case BPF_JMP | BPF_JGE | BPF_X:
1345 case BPF_JMP | BPF_JGE | BPF_K:
1346 case BPF_JMP32 | BPF_JGE | BPF_X:
1347 case BPF_JMP32 | BPF_JGE | BPF_K:
1349 case BPF_JMP | BPF_JGT | BPF_X:
1350 case BPF_JMP | BPF_JGT | BPF_K:
1351 case BPF_JMP32 | BPF_JGT | BPF_X:
1352 case BPF_JMP32 | BPF_JGT | BPF_K:
1354 case BPF_JMP | BPF_JSLE | BPF_X:
1355 case BPF_JMP | BPF_JSLE | BPF_K:
1356 case BPF_JMP32 | BPF_JSLE | BPF_X:
1357 case BPF_JMP32 | BPF_JSLE | BPF_K:
1359 case BPF_JMP | BPF_JSLT | BPF_X:
1360 case BPF_JMP | BPF_JSLT | BPF_K:
1361 case BPF_JMP32 | BPF_JSLT | BPF_X:
1362 case BPF_JMP32 | BPF_JSLT | BPF_K:
1364 case BPF_JMP | BPF_JSGE | BPF_X:
1365 case BPF_JMP | BPF_JSGE | BPF_K:
1366 case BPF_JMP32 | BPF_JSGE | BPF_X:
1367 case BPF_JMP32 | BPF_JSGE | BPF_K:
1369 case BPF_JMP | BPF_JSGT | BPF_X:
1370 case BPF_JMP | BPF_JSGT | BPF_K:
1371 case BPF_JMP32 | BPF_JSGT | BPF_X:
1372 case BPF_JMP32 | BPF_JSGT | BPF_K:
1374 case BPF_JMP | BPF_JSET | BPF_X:
1375 case BPF_JMP | BPF_JSET | BPF_K:
1376 case BPF_JMP32 | BPF_JSET | BPF_X:
1377 case BPF_JMP32 | BPF_JSET | BPF_K:
1378 paoff = hppa_offset(i, off, ctx);
1379 if (BPF_SRC(code) == BPF_K) {
1380 s = ctx->ninsns;
1381 emit_imm32(tmp2, imm, ctx);
1382 src = tmp2;
1383 e = ctx->ninsns;
1384 paoff -= (e - s);
1386 if (is64)
1387 emit_branch_r64(dst, src, paoff, ctx, BPF_OP(code));
1388 else
1389 emit_branch_r32(dst, src, paoff, ctx, BPF_OP(code));
1390 break;
1391 /* function return */
1392 case BPF_JMP | BPF_EXIT:
1393 if (i == ctx->prog->len - 1)
1394 break;
1395 /* load epilogue function pointer and jump to it. */
1396 emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
1397 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
1398 break;
1400 /* dst = imm64 */
1401 case BPF_LD | BPF_IMM | BPF_DW:
1403 struct bpf_insn insn1 = insn[1];
1404 u32 upper = insn1.imm;
1405 u32 lower = imm;
1406 const s8 *rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
1408 if (0 && bpf_pseudo_func(insn)) {
1409 WARN_ON(upper); /* we are 32-bit! */
1410 upper = 0;
1411 lower = (uintptr_t) dereference_function_descriptor(lower);
1414 emit_imm64(rd, upper, lower, ctx);
1415 bpf_put_reg64(dst, rd, ctx);
1416 return 1;
1419 /* LDX: dst = *(size *)(src + off) */
1420 case BPF_LDX | BPF_MEM | BPF_B:
1421 case BPF_LDX | BPF_MEM | BPF_H:
1422 case BPF_LDX | BPF_MEM | BPF_W:
1423 case BPF_LDX | BPF_MEM | BPF_DW:
1424 if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code)))
1425 return -1;
1426 break;
1428 /* speculation barrier */
1429 case BPF_ST | BPF_NOSPEC:
1430 break;
1432 /* ST: *(size *)(dst + off) = imm */
1433 case BPF_ST | BPF_MEM | BPF_B:
1434 case BPF_ST | BPF_MEM | BPF_H:
1435 case BPF_ST | BPF_MEM | BPF_W:
1436 case BPF_ST | BPF_MEM | BPF_DW:
1438 case BPF_STX | BPF_MEM | BPF_B:
1439 case BPF_STX | BPF_MEM | BPF_H:
1440 case BPF_STX | BPF_MEM | BPF_W:
1441 case BPF_STX | BPF_MEM | BPF_DW:
1442 if (BPF_CLASS(code) == BPF_ST) {
1443 emit_imm32(tmp2, imm, ctx);
1444 src = tmp2;
1447 if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code),
1448 BPF_MODE(code)))
1449 return -1;
1450 break;
1452 case BPF_STX | BPF_ATOMIC | BPF_W:
1453 case BPF_STX | BPF_ATOMIC | BPF_DW:
1454 pr_info_once(
1455 "bpf-jit: not supported: atomic operation %02x ***\n",
1456 insn->imm);
1457 return -EFAULT;
1459 default:
1460 pr_err("bpf-jit: unknown opcode %02x\n", code);
1461 return -EINVAL;
1464 return 0;
1467 void bpf_jit_build_prologue(struct hppa_jit_context *ctx)
1469 const s8 *tmp = regmap[TMP_REG_1];
1470 const s8 *dst, *reg;
1471 int stack_adjust = 0;
1472 int i;
1473 unsigned long addr;
1474 int bpf_stack_adjust;
1477 * stack on hppa grows up, so if tail calls are used we need to
1478 * allocate the maximum stack size
1480 if (REG_ALL_SEEN(ctx))
1481 bpf_stack_adjust = MAX_BPF_STACK;
1482 else
1483 bpf_stack_adjust = ctx->prog->aux->stack_depth;
1484 bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN);
1486 /* make space for callee-saved registers. */
1487 stack_adjust += NR_SAVED_REGISTERS * REG_SIZE;
1488 /* make space for BPF registers on stack. */
1489 stack_adjust += BPF_JIT_SCRATCH_REGS * REG_SIZE;
1490 /* make space for BPF stack. */
1491 stack_adjust += bpf_stack_adjust;
1492 /* round up for stack alignment. */
1493 stack_adjust = round_up(stack_adjust, STACK_ALIGN);
1496 * The first instruction sets the tail-call-counter (TCC) register.
1497 * This instruction is skipped by tail calls.
1498 * Use a temporary register instead of a caller-saved register initially.
1500 emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx);
1503 * skip all initializations when called as BPF TAIL call.
1505 emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx);
1506 emit(hppa_bne(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, ctx->prologue_len - 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
1508 /* set up hppa stack frame. */
1509 emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx); // copy sp,r1 (=prev_sp)
1510 emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx); // ldo stack_adjust(sp),sp (increase stack)
1511 emit(hppa_stw(HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx); // stw prev_sp,-0x04(sp)
1512 emit(hppa_stw(HPPA_REG_RP, -0x14, HPPA_REG_SP), ctx); // stw rp,-0x14(sp)
1514 REG_FORCE_SEEN(ctx, HPPA_REG_T0);
1515 REG_FORCE_SEEN(ctx, HPPA_REG_T1);
1516 REG_FORCE_SEEN(ctx, HPPA_REG_T2);
1517 REG_FORCE_SEEN(ctx, HPPA_REG_T3);
1518 REG_FORCE_SEEN(ctx, HPPA_REG_T4);
1519 REG_FORCE_SEEN(ctx, HPPA_REG_T5);
1521 /* save callee-save registers. */
1522 for (i = 3; i <= 18; i++) {
1523 if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
1524 continue;
1525 emit(hppa_stw(HPPA_R(i), -REG_SIZE * (8 + (i-3)), HPPA_REG_SP), ctx); // stw ri,-save_area(sp)
1529 * now really set the tail call counter (TCC) register.
1531 if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
1532 emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx);
1535 * save epilogue function pointer for outer TCC call chain.
1536 * The main TCC call stores the final RP on stack.
1538 addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset];
1539 /* skip first two instructions of exit function, which jump to exit */
1540 addr += 2 * HPPA_INSN_SIZE;
1541 emit(hppa_ldil(addr, HPPA_REG_T2), ctx);
1542 emit(hppa_ldo(im11(addr), HPPA_REG_T2, HPPA_REG_T2), ctx);
1543 emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx);
1545 /* load R1 & R2 from registers, R3-R5 from stack. */
1546 /* use HPPA_REG_R1 which holds the old stack value */
1547 dst = regmap[BPF_REG_5];
1548 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1549 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1550 if (REG_WAS_SEEN(ctx, hi(reg)))
1551 emit(hppa_ldw(-0x48, HPPA_REG_R1, hi(reg)), ctx);
1552 if (REG_WAS_SEEN(ctx, lo(reg)))
1553 emit(hppa_ldw(-0x44, HPPA_REG_R1, lo(reg)), ctx);
1554 bpf_put_reg64(dst, tmp, ctx);
1557 dst = regmap[BPF_REG_4];
1558 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1559 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1560 if (REG_WAS_SEEN(ctx, hi(reg)))
1561 emit(hppa_ldw(-0x40, HPPA_REG_R1, hi(reg)), ctx);
1562 if (REG_WAS_SEEN(ctx, lo(reg)))
1563 emit(hppa_ldw(-0x3c, HPPA_REG_R1, lo(reg)), ctx);
1564 bpf_put_reg64(dst, tmp, ctx);
1567 dst = regmap[BPF_REG_3];
1568 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1569 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1570 if (REG_WAS_SEEN(ctx, hi(reg)))
1571 emit(hppa_ldw(-0x38, HPPA_REG_R1, hi(reg)), ctx);
1572 if (REG_WAS_SEEN(ctx, lo(reg)))
1573 emit(hppa_ldw(-0x34, HPPA_REG_R1, lo(reg)), ctx);
1574 bpf_put_reg64(dst, tmp, ctx);
1577 dst = regmap[BPF_REG_2];
1578 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1579 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1580 if (REG_WAS_SEEN(ctx, hi(reg)))
1581 emit_hppa_copy(HPPA_REG_ARG3, hi(reg), ctx);
1582 if (REG_WAS_SEEN(ctx, lo(reg)))
1583 emit_hppa_copy(HPPA_REG_ARG2, lo(reg), ctx);
1584 bpf_put_reg64(dst, tmp, ctx);
1587 dst = regmap[BPF_REG_1];
1588 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1589 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1590 if (REG_WAS_SEEN(ctx, hi(reg)))
1591 emit_hppa_copy(HPPA_REG_ARG1, hi(reg), ctx);
1592 if (REG_WAS_SEEN(ctx, lo(reg)))
1593 emit_hppa_copy(HPPA_REG_ARG0, lo(reg), ctx);
1594 bpf_put_reg64(dst, tmp, ctx);
1597 /* Set up BPF frame pointer. */
1598 dst = regmap[BPF_REG_FP];
1599 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1600 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1601 if (REG_WAS_SEEN(ctx, lo(reg)))
1602 emit(hppa_ldo(-REG_SIZE * (NR_SAVED_REGISTERS + BPF_JIT_SCRATCH_REGS),
1603 HPPA_REG_SP, lo(reg)), ctx);
1604 if (REG_WAS_SEEN(ctx, hi(reg)))
1605 emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
1606 bpf_put_reg64(dst, tmp, ctx);
1609 emit(hppa_nop(), ctx);
1612 void bpf_jit_build_epilogue(struct hppa_jit_context *ctx)
1614 __build_epilogue(false, ctx);