1 /* LWIP service - bpf_filter.c - Berkeley Packet Filter core implementation */
3 * This is basically a drop-in replacement of NetBSD's bpf_filter.c, which
4 * itself can be compiled for either the NetBSD kernel or for userland. On
5 * MINIX 3, we would like to perform certain checks that NetBSD implements only
6 * for its kernel (e.g., memory store access validation) while replacing the
7 * NetBSD kernel specifics with our own (pbuf instead of mbuf, no BPF contexts
8 * for now, etc.). As a result, it is easier to reimplement the whole thing,
9 * because there is not all that much to it.
11 * Support for the standard BSD API allows us to run standard tests against
12 * this module from userland, where _MINIX_SYSTEM is not defined. MINIX 3
13 * specific extensions are enabled only if _MINIX_SYSTEM is defined.
18 #include <minix/bitmap.h>
24 * Obtain an unsigned 32-bit value in network byte order from the pbuf chain
25 * 'pbuf' at offset 'k'. The given offset is guaranteed to be within bounds.
28 bpf_get32_ext(const struct pbuf
* pbuf
, uint32_t k
)
34 * Find the pbuf that contains the first byte. We expect that most
35 * filters will operate only on the headers of packets, so that we
36 * mostly avoid going through this O(n) loop. Since only the superuser
37 * can open BPF devices at all, we need not be worried about abuse in
38 * this regard. However, it turns out that this loop is particularly
39 * CPU-intensive after all, we can probably improve it by caching the
40 * last visited pbuf, as read locality is likely high.
42 while (k
>= pbuf
->len
) {
49 * We assume that every pbuf has some data, but we make no assumptions
50 * about any minimum amount of data per pbuf. Therefore, we may have
51 * to take the bytes from anywhere between one and four pbufs.
52 * Hopefully the compiler will unroll this loop for us.
54 val
= (uint32_t)(((u_char
*)pbuf
->payload
)[k
]) << 24;
56 for (i
= 0; i
< 3; i
++) {
57 if (k
>= (uint32_t)pbuf
->len
- 1) {
63 val
= (val
<< 8) | (uint32_t)(((u_char
*)pbuf
->payload
)[k
]);
70 * Obtain an unsigned 16-bit value in network byte order from the pbuf chain
71 * 'pbuf' at offset 'k'. The given offset is guaranteed to be within bounds.
74 bpf_get16_ext(const struct pbuf
* pbuf
, uint32_t k
)
78 while (k
>= pbuf
->len
) {
85 * There are only two possible cases to cover here: either the two
86 * bytes are in the same pbuf, or they are in subsequent ones.
88 if (k
< (uint32_t)pbuf
->len
- 1) {
89 return ((uint32_t)(((u_char
*)pbuf
->payload
)[k
]) << 8) |
90 (uint32_t)(((u_char
*)pbuf
->next
->payload
)[k
+ 1]);
92 assert(pbuf
->next
!= NULL
);
93 return ((uint32_t)(((u_char
*)pbuf
->payload
)[k
]) << 8) |
94 (uint32_t)(((u_char
*)pbuf
->next
->payload
)[0]);
99 * Obtain an unsigned 8-bit value from the pbuf chain 'pbuf' at offset 'k'.
100 * The given offset is guaranteed to be within bounds.
103 bpf_get8_ext(const struct pbuf
* pbuf
, uint32_t k
)
107 while (k
>= pbuf
->len
) {
110 assert(pbuf
!= NULL
);
113 return (uint32_t)(((u_char
*)pbuf
->payload
)[k
]);
116 #endif /* _MINIX_SYSTEM */
119 * Execute a BPF filter program on (the first part of) a packet, and return the
120 * maximum size of the packet that should be delivered to the filter owner.
122 * The 'pc' parameter points to an array of BPF instructions that together form
123 * the filter program to be executed. If 'pc' is NULL, the packet is fully
124 * accepted. Otherwise, the given program MUST have passed a previous call to
125 * bpf_validate(). Not doing so will allow for arbitrary memory access.
127 * The 'packet' array contains up to the whole packet. The value of 'total'
128 * denotes the total length of the packet; 'len' contains the size of the array
129 * 'packet'. Chunked storage of the packet is not supported at this time.
131 * If executing the program succeeds, the return value is the maximum number of
132 * bytes from the packet to be delivered. The return value may exceed the full
133 * packet size. If the number of bytes returned is zero, the packet is to be
134 * ignored. If the program fails to execute properly and return a value, a
135 * value of zero is returned as well, thus also indicating that the packet
136 * should be ignored. This is intentional: it saves filter programs from
137 * having to perform explicit checks on the packet they are filtering.
140 bpf_filter(const struct bpf_insn
* pc
, const u_char
* packet
, u_int total
,
145 return bpf_filter_ext(pc
, NULL
/*pbuf*/, packet
, total
, len
);
149 bpf_filter_ext(const struct bpf_insn
* pc
, const struct pbuf
* pbuf
,
150 const u_char
* packet
, u_int total
, u_int len
)
151 #endif /* _MINIX_SYSTEM */
153 uint32_t k
, a
, x
, mem
[BPF_MEMWORDS
];
155 /* An empty program accepts all packets. */
160 * We need not clear 'mem': the checker guarantees that each memory
161 * store word is always written before it is read.
166 /* Execute the program. */
171 case BPF_LD
+BPF_W
+BPF_IND
: /* A <- P[X+k:4] */
176 case BPF_LD
+BPF_W
+BPF_ABS
: /* A <- P[k:4] */
178 * 'k' may have any value, so check bounds in such a
179 * way that 'k' cannot possibly overflow and wrap.
181 if (len
>= 3 && k
< len
- 3)
182 a
= ((uint32_t)packet
[k
] << 24) |
183 ((uint32_t)packet
[k
+ 1] << 16) |
184 ((uint32_t)packet
[k
+ 2] << 8) |
185 (uint32_t)packet
[k
+ 3];
187 else if (total
>= 3 && k
< total
- 3)
188 a
= bpf_get32_ext(pbuf
, k
);
189 #endif /* _MINIX_SYSTEM */
193 case BPF_LD
+BPF_H
+BPF_IND
: /* A <- P[X+k:2] */
198 case BPF_LD
+BPF_H
+BPF_ABS
: /* A <- P[k:2] */
200 if (len
>= 1 && k
< len
- 1)
201 a
= ((uint32_t)packet
[k
] << 8) |
202 (uint32_t)packet
[k
+ 1];
204 else if (total
>= 1 && k
< total
- 1)
205 a
= bpf_get16_ext(pbuf
, k
);
206 #endif /* _MINIX_SYSTEM */
210 case BPF_LD
+BPF_B
+BPF_IND
: /* A <- P[X+k:1] */
215 case BPF_LD
+BPF_B
+BPF_ABS
: /* A <- P[k:1] */
217 a
= (uint32_t)packet
[k
];
220 a
= bpf_get8_ext(pbuf
, k
);
221 #endif /* _MINIX_SYSTEM */
225 case BPF_LD
+BPF_W
+BPF_LEN
: /* A <- len */
228 case BPF_LD
+BPF_IMM
: /* A <- k */
231 case BPF_LD
+BPF_MEM
: /* A <- M[k] */
235 case BPF_LDX
+BPF_IMM
: /* X <- k */
238 case BPF_LDX
+BPF_MEM
: /* X <- M[k] */
241 case BPF_LDX
+BPF_LEN
: /* X <- len */
244 case BPF_LDX
+BPF_B
+BPF_MSH
: /* X <- 4*(P[k:1]&0xf) */
246 x
= ((uint32_t)packet
[k
] & 0xf) << 2;
249 x
= (bpf_get8_ext(pbuf
, k
) & 0xf) << 2;
250 #endif /* _MINIX_SYSTEM */
255 case BPF_ST
: /* M[k] <- A */
259 case BPF_STX
: /* M[k] <- X */
263 case BPF_ALU
+BPF_ADD
+BPF_K
: /* A <- A + k */
266 case BPF_ALU
+BPF_SUB
+BPF_K
: /* A <- A - k */
269 case BPF_ALU
+BPF_MUL
+BPF_K
: /* A <- A * k */
272 case BPF_ALU
+BPF_DIV
+BPF_K
: /* A <- A / k */
275 case BPF_ALU
+BPF_MOD
+BPF_K
: /* A <- A % k */
278 case BPF_ALU
+BPF_AND
+BPF_K
: /* A <- A & k */
281 case BPF_ALU
+BPF_OR
+BPF_K
: /* A <- A | k */
284 case BPF_ALU
+BPF_XOR
+BPF_K
: /* A <- A ^ k */
287 case BPF_ALU
+BPF_LSH
+BPF_K
: /* A <- A << k */
290 case BPF_ALU
+BPF_RSH
+BPF_K
: /* A <- A >> k */
293 case BPF_ALU
+BPF_ADD
+BPF_X
: /* A <- A + X */
296 case BPF_ALU
+BPF_SUB
+BPF_X
: /* A <- A - X */
299 case BPF_ALU
+BPF_MUL
+BPF_X
: /* A <- A * X */
302 case BPF_ALU
+BPF_DIV
+BPF_X
: /* A <- A / X */
307 case BPF_ALU
+BPF_MOD
+BPF_X
: /* A <- A % X */
312 case BPF_ALU
+BPF_AND
+BPF_X
: /* A <- A & X */
315 case BPF_ALU
+BPF_OR
+BPF_X
: /* A <- A | X */
318 case BPF_ALU
+BPF_XOR
+BPF_X
: /* A <- A ^ X */
321 case BPF_ALU
+BPF_LSH
+BPF_X
: /* A <- A << X */
326 case BPF_ALU
+BPF_RSH
+BPF_X
: /* A <- A >> X */
331 case BPF_ALU
+BPF_NEG
: /* A <- -A */
335 case BPF_JMP
+BPF_JA
: /* pc += k */
338 case BPF_JMP
+BPF_JGT
+BPF_K
: /* pc += (A > k) ? jt : jf */
339 pc
+= (a
> k
) ? pc
->jt
: pc
->jf
;
341 case BPF_JMP
+BPF_JGE
+BPF_K
: /* pc += (A >= k) ? jt : jf */
342 pc
+= (a
>= k
) ? pc
->jt
: pc
->jf
;
344 case BPF_JMP
+BPF_JEQ
+BPF_K
: /* pc += (A == k) ? jt : jf */
345 pc
+= (a
== k
) ? pc
->jt
: pc
->jf
;
347 case BPF_JMP
+BPF_JSET
+BPF_K
: /* pc += (A & k) ? jt : jf */
348 pc
+= (a
& k
) ? pc
->jt
: pc
->jf
;
350 case BPF_JMP
+BPF_JGT
+BPF_X
: /* pc += (A > X) ? jt : jf */
351 pc
+= (a
> x
) ? pc
->jt
: pc
->jf
;
353 case BPF_JMP
+BPF_JGE
+BPF_X
: /* pc += (A >= X) ? jt : jf */
354 pc
+= (a
>= x
) ? pc
->jt
: pc
->jf
;
356 case BPF_JMP
+BPF_JEQ
+BPF_X
: /* pc += (A == X) ? jt : jf */
357 pc
+= (a
== x
) ? pc
->jt
: pc
->jf
;
359 case BPF_JMP
+BPF_JSET
+BPF_X
: /* pc += (A & X) ? jt : jf */
360 pc
+= (a
& x
) ? pc
->jt
: pc
->jf
;
363 case BPF_RET
+BPF_A
: /* accept A bytes */
365 case BPF_RET
+BPF_K
: /* accept K bytes */
368 case BPF_MISC
+BPF_TAX
: /* X <- A */
371 case BPF_MISC
+BPF_TXA
: /* A <- X */
375 default: /* unknown instruction */
384 * In order to avoid having to perform explicit memory allocation, we store
385 * some validation state on the stack, using data types that are as small as
386 * possible for the current definitions. The data types, and in fact the whole
387 * assumption that we can store the state on the stack, may need to be revised
388 * if certain constants are increased in the future. As of writing, the
389 * validation routine uses a little over 1KB of stack memory.
391 #if BPF_MEMWORDS <= 16 /* value as of writing: 16 */
392 typedef uint16_t meminv_t
;
394 #error "increased BPF_MEMWORDS may require code revision"
397 #if BPF_MAXINSNS > 2048 /* value as of writing: 512 */
398 #error "increased BPF_MAXINSNS may require code revision"
402 * Verify that the given filter program is safe to execute, by performing as
403 * many static validity checks as possible. The program is given as 'insns',
404 * which must be an array of 'ninsns' BPF instructions. Unlike bpf_filter(),
405 * this function does not accept empty filter programs. The function returns 1
406 * if the program was successfully validated, or 0 if the program should not be
410 bpf_validate(const struct bpf_insn
* insns
, int ninsns
)
412 bitchunk_t reachable
[BITMAP_CHUNKS(BPF_MAXINSNS
)];
413 meminv_t invalid
, meminv
[BPF_MAXINSNS
];
414 const struct bpf_insn
*insn
;
415 u_int pc
, count
, target
;
418 if (insns
== NULL
|| ninsns
<= 0 || ninsns
> BPF_MAXINSNS
)
420 count
= (u_int
)ninsns
;
422 memset(reachable
, 0, sizeof(reachable
[0]) * BITMAP_CHUNKS(count
));
423 memset(meminv
, 0, sizeof(meminv
[0]) * count
);
425 SET_BIT(reachable
, 0);
426 meminv
[0] = (meminv_t
)~0;
428 for (pc
= 0; pc
< count
; pc
++) {
429 /* We completely ignore instructions that are not reachable. */
430 if (!GET_BIT(reachable
, pc
))
433 invalid
= meminv
[pc
];
438 switch (insn
->code
) {
439 case BPF_LD
+BPF_W
+BPF_ABS
:
440 case BPF_LD
+BPF_H
+BPF_ABS
:
441 case BPF_LD
+BPF_B
+BPF_ABS
:
442 case BPF_LD
+BPF_W
+BPF_IND
:
443 case BPF_LD
+BPF_H
+BPF_IND
:
444 case BPF_LD
+BPF_B
+BPF_IND
:
447 case BPF_LDX
+BPF_IMM
:
448 case BPF_LDX
+BPF_LEN
:
449 case BPF_LDX
+BPF_B
+BPF_MSH
:
450 case BPF_ALU
+BPF_ADD
+BPF_K
:
451 case BPF_ALU
+BPF_SUB
+BPF_K
:
452 case BPF_ALU
+BPF_MUL
+BPF_K
:
453 case BPF_ALU
+BPF_AND
+BPF_K
:
454 case BPF_ALU
+BPF_OR
+BPF_K
:
455 case BPF_ALU
+BPF_XOR
+BPF_K
:
456 case BPF_ALU
+BPF_ADD
+BPF_X
:
457 case BPF_ALU
+BPF_SUB
+BPF_X
:
458 case BPF_ALU
+BPF_MUL
+BPF_X
:
459 case BPF_ALU
+BPF_DIV
+BPF_X
:
460 case BPF_ALU
+BPF_MOD
+BPF_X
:
461 case BPF_ALU
+BPF_AND
+BPF_X
:
462 case BPF_ALU
+BPF_OR
+BPF_X
:
463 case BPF_ALU
+BPF_XOR
+BPF_X
:
464 case BPF_ALU
+BPF_LSH
+BPF_X
:
465 case BPF_ALU
+BPF_RSH
+BPF_X
:
466 case BPF_ALU
+BPF_NEG
:
467 case BPF_MISC
+BPF_TAX
:
468 case BPF_MISC
+BPF_TXA
:
469 /* Nothing we can check for these. */
471 case BPF_ALU
+BPF_DIV
+BPF_K
:
472 case BPF_ALU
+BPF_MOD
+BPF_K
:
473 /* No division by zero. */
477 case BPF_ALU
+BPF_LSH
+BPF_K
:
478 case BPF_ALU
+BPF_RSH
+BPF_K
:
479 /* Do not invoke undefined behavior. */
484 case BPF_LDX
+BPF_MEM
:
486 * Only allow loading words that have been stored in
487 * all execution paths leading up to this instruction.
489 if (insn
->k
>= BPF_MEMWORDS
||
490 (invalid
& (1 << insn
->k
)))
495 if (insn
->k
>= BPF_MEMWORDS
)
497 invalid
&= ~(1 << insn
->k
);
501 * Make sure that the target instruction of the jump is
502 * still part of the program, and mark it as reachable.
504 if (insn
->k
>= count
- pc
- 1)
506 target
= pc
+ insn
->k
+ 1;
507 SET_BIT(reachable
, target
);
508 meminv
[target
] |= invalid
;
511 case BPF_JMP
+BPF_JGT
+BPF_K
:
512 case BPF_JMP
+BPF_JGE
+BPF_K
:
513 case BPF_JMP
+BPF_JEQ
+BPF_K
:
514 case BPF_JMP
+BPF_JSET
+BPF_K
:
515 case BPF_JMP
+BPF_JGT
+BPF_X
:
516 case BPF_JMP
+BPF_JGE
+BPF_X
:
517 case BPF_JMP
+BPF_JEQ
+BPF_X
:
518 case BPF_JMP
+BPF_JSET
+BPF_X
:
520 * Make sure that both target instructions are still
521 * part of the program, and mark both as reachable.
522 * There is no chance that the additions will overflow.
524 target
= pc
+ insn
->jt
+ 1;
527 SET_BIT(reachable
, target
);
528 meminv
[target
] |= invalid
;
530 target
= pc
+ insn
->jf
+ 1;
533 SET_BIT(reachable
, target
);
534 meminv
[target
] |= invalid
;
547 * After most instructions, we simply advance to the next. For
548 * one thing, this means that there must be a next instruction
554 SET_BIT(reachable
, pc
+ 1);
555 meminv
[pc
+ 1] |= invalid
;
559 /* The program has passed all our basic tests. */