Try to fixup the mess of mdoc(7)/man(7) mixture as created by the merge.
[netbsd-mini2440.git] / sys / altq / altq_blue.c
blob7d48942352e57479c601cbc02db3670266da68f9
1 /* $NetBSD: altq_blue.c,v 1.21 2006/11/16 01:32:37 christos Exp $ */
2 /* $KAME: altq_blue.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
4 /*
5 * Copyright (C) 1997-2002
6 * Sony Computer Science Laboratories Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_blue.c,v 1.21 2006/11/16 01:32:37 christos Exp $");
66 #ifdef _KERNEL_OPT
67 #include "opt_altq.h"
68 #include "opt_inet.h"
69 #endif
71 #ifdef ALTQ_BLUE /* blue is enabled by ALTQ_BLUE option in opt_altq.h */
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/sockio.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/errno.h>
81 #include <sys/kernel.h>
82 #include <sys/kauth.h>
84 #include <net/if.h>
85 #include <net/if_types.h>
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #ifdef INET6
90 #include <netinet/ip6.h>
91 #endif
93 #include <altq/altq.h>
94 #include <altq/altq_conf.h>
95 #include <altq/altq_blue.h>
97 #ifdef ALTQ3_COMPAT
99 * Blue is proposed and implemented by Wu-chang Feng <wuchang@eecs.umich.edu>.
100 * more information on Blue is available from
101 * http://www.eecs.umich.edu/~wuchang/blue/
104 /* fixed-point uses 12-bit decimal places */
105 #define FP_SHIFT 12 /* fixed-point shift */
107 #define BLUE_LIMIT 200 /* default max queue lenght */
108 #define BLUE_STATS /* collect statistics */
110 /* blue_list keeps all blue_state_t's allocated. */
111 static blue_queue_t *blue_list = NULL;
113 /* internal function prototypes */
114 static int blue_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
115 static struct mbuf *blue_dequeue(struct ifaltq *, int);
116 static int drop_early(blue_t *);
117 static int mark_ecn(struct mbuf *, struct altq_pktattr *, int);
118 static int blue_detach(blue_queue_t *);
119 static int blue_request(struct ifaltq *, int, void *);
122 * blue device interface
124 altqdev_decl(blue);
127 blueopen(dev_t dev, int flag, int fmt,
128 struct lwp *l)
130 /* everything will be done when the queueing scheme is attached. */
131 return 0;
135 blueclose(dev_t dev, int flag, int fmt,
136 struct lwp *l)
138 blue_queue_t *rqp;
139 int err, error = 0;
141 while ((rqp = blue_list) != NULL) {
142 /* destroy all */
143 err = blue_detach(rqp);
144 if (err != 0 && error == 0)
145 error = err;
148 return error;
152 blueioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
153 struct lwp *l)
155 blue_queue_t *rqp;
156 struct blue_interface *ifacep;
157 struct ifnet *ifp;
158 int error = 0;
160 /* check super-user privilege */
161 switch (cmd) {
162 case BLUE_GETSTATS:
163 break;
164 default:
165 #if (__FreeBSD_version > 400000)
166 if ((error = suser(p)) != 0)
167 return (error);
168 #else
169 if ((error = kauth_authorize_network(l->l_cred,
170 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_BLUE, NULL,
171 NULL, NULL)) != 0)
172 return (error);
173 #endif
174 break;
177 switch (cmd) {
179 case BLUE_ENABLE:
180 ifacep = (struct blue_interface *)addr;
181 if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
182 error = EBADF;
183 break;
185 error = altq_enable(rqp->rq_ifq);
186 break;
188 case BLUE_DISABLE:
189 ifacep = (struct blue_interface *)addr;
190 if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
191 error = EBADF;
192 break;
194 error = altq_disable(rqp->rq_ifq);
195 break;
197 case BLUE_IF_ATTACH:
198 ifp = ifunit(((struct blue_interface *)addr)->blue_ifname);
199 if (ifp == NULL) {
200 error = ENXIO;
201 break;
204 /* allocate and initialize blue_state_t */
205 rqp = malloc(sizeof(blue_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
206 if (rqp == NULL) {
207 error = ENOMEM;
208 break;
211 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
212 M_WAITOK|M_ZERO);
213 if (rqp->rq_q == NULL) {
214 free(rqp, M_DEVBUF);
215 error = ENOMEM;
216 break;
219 rqp->rq_blue = malloc(sizeof(blue_t), M_DEVBUF,
220 M_WAITOK|M_ZERO);
221 if (rqp->rq_blue == NULL) {
222 free(rqp->rq_q, M_DEVBUF);
223 free(rqp, M_DEVBUF);
224 error = ENOMEM;
225 break;
228 rqp->rq_ifq = &ifp->if_snd;
229 qtail(rqp->rq_q) = NULL;
230 qlen(rqp->rq_q) = 0;
231 qlimit(rqp->rq_q) = BLUE_LIMIT;
233 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
234 blue_init(rqp->rq_blue, 0, 800, 1000, 50000);
237 * set BLUE to this ifnet structure.
239 error = altq_attach(rqp->rq_ifq, ALTQT_BLUE, rqp,
240 blue_enqueue, blue_dequeue, blue_request,
241 NULL, NULL);
242 if (error) {
243 free(rqp->rq_blue, M_DEVBUF);
244 free(rqp->rq_q, M_DEVBUF);
245 free(rqp, M_DEVBUF);
246 break;
249 /* add this state to the blue list */
250 rqp->rq_next = blue_list;
251 blue_list = rqp;
252 break;
254 case BLUE_IF_DETACH:
255 ifacep = (struct blue_interface *)addr;
256 if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
257 error = EBADF;
258 break;
260 error = blue_detach(rqp);
261 break;
263 case BLUE_GETSTATS:
264 do {
265 struct blue_stats *q_stats;
266 blue_t *rp;
268 q_stats = (struct blue_stats *)addr;
269 if ((rqp = altq_lookup(q_stats->iface.blue_ifname,
270 ALTQT_BLUE)) == NULL) {
271 error = EBADF;
272 break;
275 q_stats->q_len = qlen(rqp->rq_q);
276 q_stats->q_limit = qlimit(rqp->rq_q);
278 rp = rqp->rq_blue;
279 q_stats->q_pmark = rp->blue_pmark;
280 q_stats->xmit_packets = rp->blue_stats.xmit_packets;
281 q_stats->xmit_bytes = rp->blue_stats.xmit_bytes;
282 q_stats->drop_packets = rp->blue_stats.drop_packets;
283 q_stats->drop_bytes = rp->blue_stats.drop_bytes;
284 q_stats->drop_forced = rp->blue_stats.drop_forced;
285 q_stats->drop_unforced = rp->blue_stats.drop_unforced;
286 q_stats->marked_packets = rp->blue_stats.marked_packets;
288 } while (/*CONSTCOND*/ 0);
289 break;
291 case BLUE_CONFIG:
292 do {
293 struct blue_conf *fc;
294 int limit;
296 fc = (struct blue_conf *)addr;
297 if ((rqp = altq_lookup(fc->iface.blue_ifname,
298 ALTQT_BLUE)) == NULL) {
299 error = EBADF;
300 break;
302 limit = fc->blue_limit;
303 qlimit(rqp->rq_q) = limit;
304 fc->blue_limit = limit; /* write back the new value */
305 if (fc->blue_pkttime > 0)
306 rqp->rq_blue->blue_pkttime = fc->blue_pkttime;
307 if (fc->blue_max_pmark > 0)
308 rqp->rq_blue->blue_max_pmark = fc->blue_max_pmark;
309 if (fc->blue_hold_time > 0)
310 rqp->rq_blue->blue_hold_time = fc->blue_hold_time;
311 rqp->rq_blue->blue_flags = fc->blue_flags;
313 blue_init(rqp->rq_blue, rqp->rq_blue->blue_flags,
314 rqp->rq_blue->blue_pkttime,
315 rqp->rq_blue->blue_max_pmark,
316 rqp->rq_blue->blue_hold_time);
317 } while (/*CONSTCOND*/ 0);
318 break;
320 default:
321 error = EINVAL;
322 break;
324 return error;
327 static int
328 blue_detach(blue_queue_t *rqp)
330 blue_queue_t *tmp;
331 int error = 0;
333 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
334 altq_disable(rqp->rq_ifq);
336 if ((error = altq_detach(rqp->rq_ifq)))
337 return (error);
339 if (blue_list == rqp)
340 blue_list = rqp->rq_next;
341 else {
342 for (tmp = blue_list; tmp != NULL; tmp = tmp->rq_next)
343 if (tmp->rq_next == rqp) {
344 tmp->rq_next = rqp->rq_next;
345 break;
347 if (tmp == NULL)
348 printf("blue_detach: no state found in blue_list!\n");
351 free(rqp->rq_q, M_DEVBUF);
352 free(rqp->rq_blue, M_DEVBUF);
353 free(rqp, M_DEVBUF);
354 return (error);
358 * blue support routines
362 blue_init(blue_t *rp, int flags, int pkttime, int blue_max_pmark,
363 int blue_hold_time)
365 int npkts_per_sec;
367 rp->blue_idle = 1;
368 rp->blue_flags = flags;
369 rp->blue_pkttime = pkttime;
370 rp->blue_max_pmark = blue_max_pmark;
371 rp->blue_hold_time = blue_hold_time;
372 if (pkttime == 0)
373 rp->blue_pkttime = 1;
375 /* when the link is very slow, adjust blue parameters */
376 npkts_per_sec = 1000000 / rp->blue_pkttime;
377 if (npkts_per_sec < 50) {
379 else if (npkts_per_sec < 300) {
382 microtime(&rp->blue_last);
383 return (0);
387 * enqueue routine:
389 * returns: 0 when successfully queued.
390 * ENOBUFS when drop occurs.
392 static int
393 blue_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
395 blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
396 int error = 0;
398 if (blue_addq(rqp->rq_blue, rqp->rq_q, m, pktattr) == 0)
399 ifq->ifq_len++;
400 else
401 error = ENOBUFS;
402 return error;
405 #define DTYPE_NODROP 0 /* no drop */
406 #define DTYPE_FORCED 1 /* a "forced" drop */
407 #define DTYPE_EARLY 2 /* an "unforced" (early) drop */
410 blue_addq(blue_t *rp, class_queue_t *q, struct mbuf *m,
411 struct altq_pktattr *pktattr)
413 int droptype;
416 * if we were idle, this is an enqueue onto an empty queue
417 * and we should decrement marking probability
420 if (rp->blue_idle) {
421 struct timeval now;
422 int t;
423 rp->blue_idle = 0;
424 microtime(&now);
425 t = (now.tv_sec - rp->blue_last.tv_sec);
426 if ( t > 1) {
427 rp->blue_pmark = 1;
428 microtime(&rp->blue_last);
429 } else {
430 t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
431 if (t > rp->blue_hold_time) {
432 rp->blue_pmark--;
433 if (rp->blue_pmark < 0) rp->blue_pmark = 0;
434 microtime(&rp->blue_last);
439 /* see if we drop early */
440 droptype = DTYPE_NODROP;
441 if (drop_early(rp) && qlen(q) > 1) {
442 /* mark or drop by blue */
443 if ((rp->blue_flags & BLUEF_ECN) &&
444 mark_ecn(m, pktattr, rp->blue_flags)) {
445 /* successfully marked. do not drop. */
446 #ifdef BLUE_STATS
447 rp->blue_stats.marked_packets++;
448 #endif
449 } else {
450 /* unforced drop by blue */
451 droptype = DTYPE_EARLY;
456 * if the queue length hits the hard limit, it's a forced drop.
458 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
459 droptype = DTYPE_FORCED;
461 /* if successful or forced drop, enqueue this packet. */
462 if (droptype != DTYPE_EARLY)
463 _addq(q, m);
465 if (droptype != DTYPE_NODROP) {
466 if (droptype == DTYPE_EARLY) {
467 /* drop the incoming packet */
468 #ifdef BLUE_STATS
469 rp->blue_stats.drop_unforced++;
470 #endif
471 } else {
472 struct timeval now;
473 int t;
474 /* forced drop, select a victim packet in the queue. */
475 m = _getq_random(q);
476 microtime(&now);
477 t = (now.tv_sec - rp->blue_last.tv_sec);
478 t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
479 if (t > rp->blue_hold_time) {
480 rp->blue_pmark += rp->blue_max_pmark >> 3;
481 if (rp->blue_pmark > rp->blue_max_pmark)
482 rp->blue_pmark = rp->blue_max_pmark;
483 microtime(&rp->blue_last);
485 #ifdef BLUE_STATS
486 rp->blue_stats.drop_forced++;
487 #endif
489 #ifdef BLUE_STATS
490 rp->blue_stats.drop_packets++;
491 rp->blue_stats.drop_bytes += m->m_pkthdr.len;
492 #endif
493 m_freem(m);
494 return (-1);
496 /* successfully queued */
497 return (0);
501 * early-drop probability is kept in blue_pmark
504 static int
505 drop_early(blue_t *rp)
507 if ((arc4random() % rp->blue_max_pmark) < rp->blue_pmark) {
508 /* drop or mark */
509 return (1);
511 /* no drop/mark */
512 return (0);
516 * try to mark CE bit to the packet.
517 * returns 1 if successfully marked, 0 otherwise.
519 static int
520 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
522 struct mbuf *m0;
524 if (pktattr == NULL ||
525 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
526 return (0);
528 /* verify that pattr_hdr is within the mbuf data */
529 for (m0 = m; m0 != NULL; m0 = m0->m_next)
530 if (((char *)pktattr->pattr_hdr >= m0->m_data) &&
531 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len))
532 break;
533 if (m0 == NULL) {
534 /* ick, pattr_hdr is stale */
535 pktattr->pattr_af = AF_UNSPEC;
536 return (0);
539 switch (pktattr->pattr_af) {
540 case AF_INET:
541 if (flags & BLUEF_ECN4) {
542 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
543 u_int8_t otos;
544 int sum;
546 if (ip->ip_v != 4)
547 return (0); /* version mismatch! */
548 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
549 return (0); /* not-ECT */
550 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
551 return (1); /* already marked */
554 * ecn-capable but not marked,
555 * mark CE and update checksum
557 otos = ip->ip_tos;
558 ip->ip_tos |= IPTOS_ECN_CE;
560 * update checksum (from RFC1624)
561 * HC' = ~(~HC + ~m + m')
563 sum = ~ntohs(ip->ip_sum) & 0xffff;
564 sum += (~otos & 0xffff) + ip->ip_tos;
565 sum = (sum >> 16) + (sum & 0xffff);
566 sum += (sum >> 16); /* add carry */
567 ip->ip_sum = htons(~sum & 0xffff);
568 return (1);
570 break;
571 #ifdef INET6
572 case AF_INET6:
573 if (flags & BLUEF_ECN6) {
574 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
575 u_int32_t flowlabel;
577 flowlabel = ntohl(ip6->ip6_flow);
578 if ((flowlabel >> 28) != 6)
579 return (0); /* version mismatch! */
580 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
581 (IPTOS_ECN_NOTECT << 20))
582 return (0); /* not-ECT */
583 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
584 (IPTOS_ECN_CE << 20))
585 return (1); /* already marked */
587 * ecn-capable but not marked, mark CE
589 flowlabel |= (IPTOS_ECN_CE << 20);
590 ip6->ip6_flow = htonl(flowlabel);
591 return (1);
593 break;
594 #endif /* INET6 */
597 /* not marked */
598 return (0);
602 * dequeue routine:
603 * must be called in splnet.
605 * returns: mbuf dequeued.
606 * NULL when no packet is available in the queue.
609 static struct mbuf *
610 blue_dequeue(struct ifaltq * ifq, int op)
612 blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
613 struct mbuf *m = NULL;
615 if (op == ALTDQ_POLL)
616 return (qhead(rqp->rq_q));
618 m = blue_getq(rqp->rq_blue, rqp->rq_q);
619 if (m != NULL)
620 ifq->ifq_len--;
621 return m;
624 struct mbuf *
625 blue_getq(blue_t *rp, class_queue_t *q)
627 struct mbuf *m;
629 if ((m = _getq(q)) == NULL) {
630 if (rp->blue_idle == 0) {
631 rp->blue_idle = 1;
632 microtime(&rp->blue_last);
634 return NULL;
637 rp->blue_idle = 0;
638 #ifdef BLUE_STATS
639 rp->blue_stats.xmit_packets++;
640 rp->blue_stats.xmit_bytes += m->m_pkthdr.len;
641 #endif
642 return (m);
645 static int
646 blue_request(struct ifaltq *ifq, int req, void *arg)
648 blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
650 switch (req) {
651 case ALTRQ_PURGE:
652 _flushq(rqp->rq_q);
653 if (ALTQ_IS_ENABLED(ifq))
654 ifq->ifq_len = 0;
655 break;
657 return (0);
661 #ifdef KLD_MODULE
663 static struct altqsw blue_sw =
664 {"blue", blueopen, blueclose, blueioctl};
666 ALTQ_MODULE(altq_blue, ALTQT_BLUE, &blue_sw);
668 #endif /* KLD_MODULE */
670 #endif /* ALTQ3_COMPAT */
671 #endif /* ALTQ_BLUE */