Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / io / pfmod.c
blobc9c452e6efe81f119a6ea019d43967c097c4491b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * STREAMS Packet Filter Module
29 * This module applies a filter to messages arriving on its read
30 * queue, passing on messages that the filter accepts adn discarding
31 * the others. It supports ioctls for setting the filter.
33 * On the write side, the module simply passes everything through
34 * unchanged.
36 * Based on SunOS 4.x version. This version has minor changes:
37 * - general SVR4 porting stuff
38 * - change name and prefixes from "nit" buffer to streams buffer
39 * - multithreading assumes configured as D_MTQPAIR
42 #include <sys/types.h>
43 #include <sys/sysmacros.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/time.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/conf.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/kmem.h>
53 #include <sys/strsun.h>
54 #include <sys/pfmod.h>
55 #include <sys/modctl.h>
58 * Expanded version of the Packetfilt structure that includes
59 * some additional fields that aid filter execution efficiency.
61 struct epacketfilt {
62 struct Pf_ext_packetfilt pf;
63 #define pf_Priority pf.Pf_Priority
64 #define pf_FilterLen pf.Pf_FilterLen
65 #define pf_Filter pf.Pf_Filter
66 /* pointer to word immediately past end of filter */
67 ushort_t *pf_FilterEnd;
68 /* length in bytes of packet prefix the filter examines */
69 ushort_t pf_PByteLen;
73 * (Internal) packet descriptor for FilterPacket
75 struct packdesc {
76 ushort_t *pd_hdr; /* header starting address */
77 uint_t pd_hdrlen; /* header length in shorts */
78 ushort_t *pd_body; /* body starting address */
79 uint_t pd_bodylen; /* body length in shorts */
84 * Function prototypes.
86 static int pfopen(queue_t *, dev_t *, int, int, cred_t *);
87 static int pfclose(queue_t *, int, cred_t *);
88 static void pfioctl(queue_t *wq, mblk_t *mp);
89 static int FilterPacket(struct packdesc *, struct epacketfilt *);
91 * To save instructions, since STREAMS ignores the return value
92 * from these functions, they are defined as void here. Kind of icky, but...
94 static void pfwput(queue_t *, mblk_t *);
95 static void pfrput(queue_t *, mblk_t *);
97 static struct module_info pf_minfo = {
98 22, /* mi_idnum */
99 "pfmod", /* mi_idname */
100 0, /* mi_minpsz */
101 INFPSZ, /* mi_maxpsz */
102 0, /* mi_hiwat */
103 0 /* mi_lowat */
106 static struct qinit pf_rinit = {
107 (int (*)())pfrput, /* qi_putp */
108 NULL,
109 pfopen, /* qi_qopen */
110 pfclose, /* qi_qclose */
111 NULL, /* qi_qadmin */
112 &pf_minfo, /* qi_minfo */
113 NULL /* qi_mstat */
116 static struct qinit pf_winit = {
117 (int (*)())pfwput, /* qi_putp */
118 NULL, /* qi_srvp */
119 NULL, /* qi_qopen */
120 NULL, /* qi_qclose */
121 NULL, /* qi_qadmin */
122 &pf_minfo, /* qi_minfo */
123 NULL /* qi_mstat */
126 static struct streamtab pf_info = {
127 &pf_rinit, /* st_rdinit */
128 &pf_winit, /* st_wrinit */
129 NULL, /* st_muxrinit */
130 NULL /* st_muxwinit */
133 static struct fmodsw fsw = {
134 "pfmod",
135 &pf_info,
136 D_MTQPAIR | D_MP
139 static struct modlstrmod modlstrmod = {
140 &mod_strmodops, "streams packet filter module", &fsw
143 static struct modlinkage modlinkage = {
144 MODREV_1, &modlstrmod, NULL
148 _init(void)
150 return (mod_install(&modlinkage));
154 _fini(void)
156 return (mod_remove(&modlinkage));
160 _info(struct modinfo *modinfop)
162 return (mod_info(&modlinkage, modinfop));
165 /*ARGSUSED*/
166 static int
167 pfopen(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
169 struct epacketfilt *pfp;
171 ASSERT(rq);
173 if (sflag != MODOPEN)
174 return (EINVAL);
176 if (rq->q_ptr)
177 return (0);
180 * Allocate and initialize per-Stream structure.
182 pfp = kmem_alloc(sizeof (struct epacketfilt), KM_SLEEP);
183 rq->q_ptr = WR(rq)->q_ptr = (char *)pfp;
185 qprocson(rq);
187 return (0);
190 /* ARGSUSED */
191 static int
192 pfclose(queue_t *rq, int flags __unused, cred_t *credp __unused)
194 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
196 ASSERT(pfp);
198 qprocsoff(rq);
200 kmem_free(pfp, sizeof (struct epacketfilt));
201 rq->q_ptr = WR(rq)->q_ptr = NULL;
203 return (0);
207 * Write-side put procedure. Its main task is to detect ioctls.
208 * Other message types are passed on through.
210 static void
211 pfwput(queue_t *wq, mblk_t *mp)
213 switch (mp->b_datap->db_type) {
214 case M_IOCTL:
215 pfioctl(wq, mp);
216 break;
218 default:
219 putnext(wq, mp);
220 break;
225 * Read-side put procedure. It's responsible for applying the
226 * packet filter and passing upstream message on or discarding it
227 * depending upon the results.
229 * Upstream messages can start with zero or more M_PROTO mblks
230 * which are skipped over before executing the packet filter
231 * on any remaining M_DATA mblks.
233 static void
234 pfrput(queue_t *rq, mblk_t *mp)
236 struct epacketfilt *pfp = (struct epacketfilt *)rq->q_ptr;
237 mblk_t *mbp, *mpp;
238 struct packdesc pd;
239 int need;
241 ASSERT(pfp);
243 switch (DB_TYPE(mp)) {
244 case M_PROTO:
245 case M_DATA:
247 * Skip over protocol information and find the start
248 * of the message body, saving the overall message
249 * start in mpp.
251 for (mpp = mp; mp && (DB_TYPE(mp) == M_PROTO); mp = mp->b_cont)
255 * Null body (exclusive of M_PROTO blocks) ==> accept.
256 * Note that a null body is not the same as an empty body.
258 if (mp == NULL) {
259 putnext(rq, mpp);
260 break;
264 * Pull the packet up to the length required by
265 * the filter. Note that doing so destroys sharing
266 * relationships, which is unfortunate, since the
267 * results of pulling up here are likely to be useful
268 * for shared messages applied to a filter on a sibling
269 * stream.
271 * Most packet sources will provide the packet in two
272 * logical pieces: an initial header in a single mblk,
273 * and a body in a sequence of mblks hooked to the
274 * header. We're prepared to deal with variant forms,
275 * but in any case, the pullup applies only to the body
276 * part.
278 mbp = mp->b_cont;
279 need = pfp->pf_PByteLen;
280 if (mbp && (MBLKL(mbp) < need)) {
281 int len = msgdsize(mbp);
283 /* XXX discard silently on pullupmsg failure */
284 if (pullupmsg(mbp, MIN(need, len)) == 0) {
285 freemsg(mpp);
286 break;
291 * Misalignment (not on short boundary) ==> reject.
293 if (((uintptr_t)mp->b_rptr & (sizeof (ushort_t) - 1)) ||
294 (mbp != NULL &&
295 ((uintptr_t)mbp->b_rptr & (sizeof (ushort_t) - 1)))) {
296 freemsg(mpp);
297 break;
301 * These assignments are distasteful, but necessary,
302 * since the packet filter wants to work in terms of
303 * shorts. Odd bytes at the end of header or data can't
304 * participate in the filtering operation.
306 pd.pd_hdr = (ushort_t *)mp->b_rptr;
307 pd.pd_hdrlen = (mp->b_wptr - mp->b_rptr) / sizeof (ushort_t);
308 if (mbp) {
309 pd.pd_body = (ushort_t *)mbp->b_rptr;
310 pd.pd_bodylen = (mbp->b_wptr - mbp->b_rptr) /
311 sizeof (ushort_t);
312 } else {
313 pd.pd_body = NULL;
314 pd.pd_bodylen = 0;
318 * Apply the filter.
320 if (FilterPacket(&pd, pfp))
321 putnext(rq, mpp);
322 else
323 freemsg(mpp);
325 break;
327 default:
328 putnext(rq, mp);
329 break;
335 * Handle write-side M_IOCTL messages.
337 static void
338 pfioctl(queue_t *wq, mblk_t *mp)
340 struct epacketfilt *pfp = (struct epacketfilt *)wq->q_ptr;
341 struct Pf_ext_packetfilt *upfp;
342 struct packetfilt *opfp;
343 ushort_t *fwp;
344 int arg;
345 int maxoff = 0;
346 int maxoffreg = 0;
347 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
348 int error;
350 switch (iocp->ioc_cmd) {
351 case PFIOCSETF:
353 * Verify argument length. Since the size of packet filter
354 * got increased (ENMAXFILTERS was bumped up to 2047), to
355 * maintain backwards binary compatibility, we need to
356 * check for both possible sizes.
358 switch (iocp->ioc_count) {
359 case sizeof (struct Pf_ext_packetfilt):
360 error = miocpullup(mp,
361 sizeof (struct Pf_ext_packetfilt));
362 if (error != 0) {
363 miocnak(wq, mp, 0, error);
364 return;
366 upfp = (struct Pf_ext_packetfilt *)mp->b_cont->b_rptr;
367 if (upfp->Pf_FilterLen > PF_MAXFILTERS) {
368 miocnak(wq, mp, 0, EINVAL);
369 return;
372 bcopy(upfp, pfp, sizeof (struct Pf_ext_packetfilt));
373 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
374 break;
376 case sizeof (struct packetfilt):
377 error = miocpullup(mp, sizeof (struct packetfilt));
378 if (error != 0) {
379 miocnak(wq, mp, 0, error);
380 return;
382 opfp = (struct packetfilt *)mp->b_cont->b_rptr;
383 /* this strange comparison keeps gcc from complaining */
384 if (opfp->Pf_FilterLen - 1 >= ENMAXFILTERS) {
385 miocnak(wq, mp, 0, EINVAL);
386 return;
389 pfp->pf.Pf_Priority = opfp->Pf_Priority;
390 pfp->pf.Pf_FilterLen = (unsigned int)opfp->Pf_FilterLen;
392 bcopy(opfp->Pf_Filter, pfp->pf.Pf_Filter,
393 sizeof (opfp->Pf_Filter));
394 pfp->pf_FilterEnd = &pfp->pf_Filter[pfp->pf_FilterLen];
395 break;
397 default:
398 miocnak(wq, mp, 0, EINVAL);
399 return;
403 * Find and record maximum byte offset that the
404 * filter users. We use this when executing the
405 * filter to determine how much of the packet
406 * body to pull up. This code depends on the
407 * filter encoding.
409 for (fwp = pfp->pf_Filter; fwp < pfp->pf_FilterEnd; fwp++) {
410 arg = *fwp & ((1 << ENF_NBPA) - 1);
411 switch (arg) {
412 default:
413 if ((arg -= ENF_PUSHWORD) > maxoff)
414 maxoff = arg;
415 break;
417 case ENF_LOAD_OFFSET:
418 /* Point to the offset */
419 fwp++;
420 if (*fwp > maxoffreg)
421 maxoffreg = *fwp;
422 break;
424 case ENF_PUSHLIT:
425 case ENF_BRTR:
426 case ENF_BRFL:
427 /* Skip over the literal. */
428 fwp++;
429 break;
431 case ENF_PUSHZERO:
432 case ENF_PUSHONE:
433 case ENF_PUSHFFFF:
434 case ENF_PUSHFF00:
435 case ENF_PUSH00FF:
436 case ENF_NOPUSH:
437 case ENF_POP:
438 break;
443 * Convert word offset to length in bytes.
445 pfp->pf_PByteLen = (maxoff + maxoffreg + 1) * sizeof (ushort_t);
446 miocack(wq, mp, 0, 0);
447 break;
449 default:
450 putnext(wq, mp);
451 break;
455 /* #define DEBUG 1 */
456 /* #define INNERDEBUG 1 */
458 #ifdef INNERDEBUG
459 #define enprintf(a) printf a
460 #else
461 #define enprintf(a)
462 #endif
465 * Apply the packet filter given by pfp to the packet given by
466 * pp. Return nonzero iff the filter accepts the packet.
468 * The packet comes in two pieces, a header and a body, since
469 * that's the most convenient form for our caller. The header
470 * is in contiguous memory, whereas the body is in a mbuf.
471 * Our caller will have adjusted the mbuf chain so that its first
472 * min(MLEN, length(body)) bytes are guaranteed contiguous. For
473 * the sake of efficiency (and some laziness) the filter is prepared
474 * to examine only these two contiguous pieces. Furthermore, it
475 * assumes that the header length is even, so that there's no need
476 * to glue the last byte of header to the first byte of data.
479 #define opx(i) ((i) >> ENF_NBPA)
481 static int
482 FilterPacket(struct packdesc *pp, struct epacketfilt *pfp)
484 int maxhdr = pp->pd_hdrlen;
485 int maxword = maxhdr + pp->pd_bodylen;
486 ushort_t *sp;
487 ushort_t *fp;
488 ushort_t *fpe;
489 unsigned op;
490 unsigned arg;
491 unsigned offreg = 0;
492 ushort_t stack[ENMAXFILTERS+1];
494 fp = &pfp->pf_Filter[0];
495 fpe = pfp->pf_FilterEnd;
497 enprintf(("FilterPacket(%p, %p, %p, %p):\n", pp, pfp, fp, fpe));
500 * Push TRUE on stack to start. The stack size is chosen such
501 * that overflow can't occur -- each operation can push at most
502 * one item on the stack, and the stack size equals the maximum
503 * program length.
505 sp = &stack[ENMAXFILTERS];
506 *sp = 1;
508 while (fp < fpe) {
509 op = *fp >> ENF_NBPA;
510 arg = *fp & ((1 << ENF_NBPA) - 1);
511 fp++;
513 switch (arg) {
514 default:
515 arg -= ENF_PUSHWORD;
517 * Since arg is unsigned,
518 * if it were less than ENF_PUSHWORD before,
519 * it would now be huge.
521 if (arg + offreg < maxhdr)
522 *--sp = pp->pd_hdr[arg + offreg];
523 else if (arg + offreg < maxword)
524 *--sp = pp->pd_body[arg - maxhdr + offreg];
525 else {
526 enprintf(("=>0(len)\n"));
527 return (0);
529 break;
530 case ENF_PUSHLIT:
531 *--sp = *fp++;
532 break;
533 case ENF_PUSHZERO:
534 *--sp = 0;
535 break;
536 case ENF_PUSHONE:
537 *--sp = 1;
538 break;
539 case ENF_PUSHFFFF:
540 *--sp = 0xffff;
541 break;
542 case ENF_PUSHFF00:
543 *--sp = 0xff00;
544 break;
545 case ENF_PUSH00FF:
546 *--sp = 0x00ff;
547 break;
548 case ENF_LOAD_OFFSET:
549 offreg = *fp++;
550 break;
551 case ENF_BRTR:
552 if (*sp != 0)
553 fp += *fp;
554 else
555 fp++;
556 if (fp >= fpe) {
557 enprintf(("BRTR: fp>=fpe\n"));
558 return (0);
560 break;
561 case ENF_BRFL:
562 if (*sp == 0)
563 fp += *fp;
564 else
565 fp++;
566 if (fp >= fpe) {
567 enprintf(("BRFL: fp>=fpe\n"));
568 return (0);
570 break;
571 case ENF_POP:
572 ++sp;
573 if (sp > &stack[ENMAXFILTERS]) {
574 enprintf(("stack underflow\n"));
575 return (0);
577 break;
578 case ENF_NOPUSH:
579 break;
582 if (sp < &stack[2]) { /* check stack overflow: small yellow zone */
583 enprintf(("=>0(--sp)\n"));
584 return (0);
587 if (op == ENF_NOP)
588 continue;
591 * all non-NOP operators binary, must have at least two operands
592 * on stack to evaluate.
594 if (sp > &stack[ENMAXFILTERS-2]) {
595 enprintf(("=>0(sp++)\n"));
596 return (0);
599 arg = *sp++;
600 switch (op) {
601 default:
602 enprintf(("=>0(def)\n"));
603 return (0);
604 case opx(ENF_AND):
605 *sp &= arg;
606 break;
607 case opx(ENF_OR):
608 *sp |= arg;
609 break;
610 case opx(ENF_XOR):
611 *sp ^= arg;
612 break;
613 case opx(ENF_EQ):
614 *sp = (*sp == arg);
615 break;
616 case opx(ENF_NEQ):
617 *sp = (*sp != arg);
618 break;
619 case opx(ENF_LT):
620 *sp = (*sp < arg);
621 break;
622 case opx(ENF_LE):
623 *sp = (*sp <= arg);
624 break;
625 case opx(ENF_GT):
626 *sp = (*sp > arg);
627 break;
628 case opx(ENF_GE):
629 *sp = (*sp >= arg);
630 break;
632 /* short-circuit operators */
634 case opx(ENF_COR):
635 if (*sp++ == arg) {
636 enprintf(("=>COR %x\n", *sp));
637 return (1);
639 break;
640 case opx(ENF_CAND):
641 if (*sp++ != arg) {
642 enprintf(("=>CAND %x\n", *sp));
643 return (0);
645 break;
646 case opx(ENF_CNOR):
647 if (*sp++ == arg) {
648 enprintf(("=>COR %x\n", *sp));
649 return (0);
651 break;
652 case opx(ENF_CNAND):
653 if (*sp++ != arg) {
654 enprintf(("=>CNAND %x\n", *sp));
655 return (1);
657 break;
660 enprintf(("=>%x\n", *sp));
661 return (*sp);