xHCI: refine td allocation
[zen-stable.git] / arch / powerpc / net / bpf_jit_64.S
blobff4506e85cce80bc2fdb7003078cec3f5c0d9962
1 /* bpf_jit.S: Packet/header access helper functions
2  * for PPC64 BPF compiler.
3  *
4  * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; version 2
9  * of the License.
10  */
12 #include <asm/ppc_asm.h>
13 #include "bpf_jit.h"
16  * All of these routines are called directly from generated code,
17  * whose register usage is:
18  *
19  * r3           skb
20  * r4,r5        A,X
21  * r6           *** address parameter to helper ***
22  * r7-r10       scratch
23  * r14          skb->data
24  * r15          skb headlen
25  * r16-31       M[]
26  */
29  * To consider: These helpers are so small it could be better to just
30  * generate them inline.  Inline code can do the simple headlen check
31  * then branch directly to slow_path_XXX if required.  (In fact, could
32  * load a spare GPR with the address of slow_path_generic and pass size
33  * as an argument, making the call site a mtlr, li and bllr.)
34  *
35  * Technically, the "is addr < 0" check is unnecessary & slowing down
36  * the ABS path, as it's statically checked on generation.
37  */
38         .globl  sk_load_word
39 sk_load_word:
40         cmpdi   r_addr, 0
41         blt     bpf_error
42         /* Are we accessing past headlen? */
43         subi    r_scratch1, r_HL, 4
44         cmpd    r_scratch1, r_addr
45         blt     bpf_slow_path_word
46         /* Nope, just hitting the header.  cr0 here is eq or gt! */
47         lwzx    r_A, r_D, r_addr
48         /* When big endian we don't need to byteswap. */
49         blr     /* Return success, cr0 != LT */
51         .globl  sk_load_half
52 sk_load_half:
53         cmpdi   r_addr, 0
54         blt     bpf_error
55         subi    r_scratch1, r_HL, 2
56         cmpd    r_scratch1, r_addr
57         blt     bpf_slow_path_half
58         lhzx    r_A, r_D, r_addr
59         blr
61         .globl  sk_load_byte
62 sk_load_byte:
63         cmpdi   r_addr, 0
64         blt     bpf_error
65         cmpd    r_HL, r_addr
66         ble     bpf_slow_path_byte
67         lbzx    r_A, r_D, r_addr
68         blr
71  * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
72  * r_addr is the offset value, already known positive
73  */
74         .globl sk_load_byte_msh
75 sk_load_byte_msh:
76         cmpd    r_HL, r_addr
77         ble     bpf_slow_path_byte_msh
78         lbzx    r_X, r_D, r_addr
79         rlwinm  r_X, r_X, 2, 32-4-2, 31-2
80         blr
82 bpf_error:
83         /* Entered with cr0 = lt */
84         li      r3, 0
85         /* Generated code will 'blt epilogue', returning 0. */
86         blr
88 /* Call out to skb_copy_bits:
89  * We'll need to back up our volatile regs first; we have
90  * local variable space at r1+(BPF_PPC_STACK_BASIC).
91  * Allocate a new stack frame here to remain ABI-compliant in
92  * stashing LR.
93  */
94 #define bpf_slow_path_common(SIZE)                              \
95         mflr    r0;                                             \
96         std     r0, 16(r1);                                     \
97         /* R3 goes in parameter space of caller's frame */      \
98         std     r_skb, (BPF_PPC_STACKFRAME+48)(r1);             \
99         std     r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);           \
100         std     r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);           \
101         addi    r5, r1, BPF_PPC_STACK_BASIC+(2*8);              \
102         stdu    r1, -BPF_PPC_SLOWPATH_FRAME(r1);                \
103         /* R3 = r_skb, as passed */                             \
104         mr      r4, r_addr;                                     \
105         li      r6, SIZE;                                       \
106         bl      skb_copy_bits;                                  \
107         /* R3 = 0 on success */                                 \
108         addi    r1, r1, BPF_PPC_SLOWPATH_FRAME;                 \
109         ld      r0, 16(r1);                                     \
110         ld      r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);           \
111         ld      r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);           \
112         mtlr    r0;                                             \
113         cmpdi   r3, 0;                                          \
114         blt     bpf_error;      /* cr0 = LT */                  \
115         ld      r_skb, (BPF_PPC_STACKFRAME+48)(r1);             \
116         /* Great success! */
118 bpf_slow_path_word:
119         bpf_slow_path_common(4)
120         /* Data value is on stack, and cr0 != LT */
121         lwz     r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
122         blr
124 bpf_slow_path_half:
125         bpf_slow_path_common(2)
126         lhz     r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
127         blr
129 bpf_slow_path_byte:
130         bpf_slow_path_common(1)
131         lbz     r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
132         blr
134 bpf_slow_path_byte_msh:
135         bpf_slow_path_common(1)
136         lbz     r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
137         rlwinm  r_X, r_X, 2, 32-4-2, 31-2
138         blr