1 /* $NetBSD: altq_blue.c,v 1.21 2006/11/16 01:32:37 christos Exp $ */
2 /* $KAME: altq_blue.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
5 * Copyright (C) 1997-2002
6 * Sony Computer Science Laboratories Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_blue.c,v 1.21 2006/11/16 01:32:37 christos Exp $");
71 #ifdef ALTQ_BLUE /* blue is enabled by ALTQ_BLUE option in opt_altq.h */
73 #include <sys/param.h>
74 #include <sys/malloc.h>
76 #include <sys/socket.h>
77 #include <sys/sockio.h>
78 #include <sys/systm.h>
80 #include <sys/errno.h>
81 #include <sys/kernel.h>
82 #include <sys/kauth.h>
85 #include <net/if_types.h>
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
90 #include <netinet/ip6.h>
93 #include <altq/altq.h>
94 #include <altq/altq_conf.h>
95 #include <altq/altq_blue.h>
99 * Blue is proposed and implemented by Wu-chang Feng <wuchang@eecs.umich.edu>.
100 * more information on Blue is available from
101 * http://www.eecs.umich.edu/~wuchang/blue/
104 /* fixed-point uses 12-bit decimal places */
105 #define FP_SHIFT 12 /* fixed-point shift */
107 #define BLUE_LIMIT 200 /* default max queue lenght */
108 #define BLUE_STATS /* collect statistics */
110 /* blue_list keeps all blue_state_t's allocated. */
111 static blue_queue_t
*blue_list
= NULL
;
113 /* internal function prototypes */
114 static int blue_enqueue(struct ifaltq
*, struct mbuf
*, struct altq_pktattr
*);
115 static struct mbuf
*blue_dequeue(struct ifaltq
*, int);
116 static int drop_early(blue_t
*);
117 static int mark_ecn(struct mbuf
*, struct altq_pktattr
*, int);
118 static int blue_detach(blue_queue_t
*);
119 static int blue_request(struct ifaltq
*, int, void *);
122 * blue device interface
127 blueopen(dev_t dev
, int flag
, int fmt
,
130 /* everything will be done when the queueing scheme is attached. */
135 blueclose(dev_t dev
, int flag
, int fmt
,
141 while ((rqp
= blue_list
) != NULL
) {
143 err
= blue_detach(rqp
);
144 if (err
!= 0 && error
== 0)
152 blueioctl(dev_t dev
, ioctlcmd_t cmd
, void *addr
, int flag
,
156 struct blue_interface
*ifacep
;
160 /* check super-user privilege */
165 #if (__FreeBSD_version > 400000)
166 if ((error
= suser(p
)) != 0)
169 if ((error
= kauth_authorize_network(l
->l_cred
,
170 KAUTH_NETWORK_ALTQ
, KAUTH_REQ_NETWORK_ALTQ_BLUE
, NULL
,
180 ifacep
= (struct blue_interface
*)addr
;
181 if ((rqp
= altq_lookup(ifacep
->blue_ifname
, ALTQT_BLUE
)) == NULL
) {
185 error
= altq_enable(rqp
->rq_ifq
);
189 ifacep
= (struct blue_interface
*)addr
;
190 if ((rqp
= altq_lookup(ifacep
->blue_ifname
, ALTQT_BLUE
)) == NULL
) {
194 error
= altq_disable(rqp
->rq_ifq
);
198 ifp
= ifunit(((struct blue_interface
*)addr
)->blue_ifname
);
204 /* allocate and initialize blue_state_t */
205 rqp
= malloc(sizeof(blue_queue_t
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
211 rqp
->rq_q
= malloc(sizeof(class_queue_t
), M_DEVBUF
,
213 if (rqp
->rq_q
== NULL
) {
219 rqp
->rq_blue
= malloc(sizeof(blue_t
), M_DEVBUF
,
221 if (rqp
->rq_blue
== NULL
) {
222 free(rqp
->rq_q
, M_DEVBUF
);
228 rqp
->rq_ifq
= &ifp
->if_snd
;
229 qtail(rqp
->rq_q
) = NULL
;
231 qlimit(rqp
->rq_q
) = BLUE_LIMIT
;
233 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
234 blue_init(rqp
->rq_blue
, 0, 800, 1000, 50000);
237 * set BLUE to this ifnet structure.
239 error
= altq_attach(rqp
->rq_ifq
, ALTQT_BLUE
, rqp
,
240 blue_enqueue
, blue_dequeue
, blue_request
,
243 free(rqp
->rq_blue
, M_DEVBUF
);
244 free(rqp
->rq_q
, M_DEVBUF
);
249 /* add this state to the blue list */
250 rqp
->rq_next
= blue_list
;
255 ifacep
= (struct blue_interface
*)addr
;
256 if ((rqp
= altq_lookup(ifacep
->blue_ifname
, ALTQT_BLUE
)) == NULL
) {
260 error
= blue_detach(rqp
);
265 struct blue_stats
*q_stats
;
268 q_stats
= (struct blue_stats
*)addr
;
269 if ((rqp
= altq_lookup(q_stats
->iface
.blue_ifname
,
270 ALTQT_BLUE
)) == NULL
) {
275 q_stats
->q_len
= qlen(rqp
->rq_q
);
276 q_stats
->q_limit
= qlimit(rqp
->rq_q
);
279 q_stats
->q_pmark
= rp
->blue_pmark
;
280 q_stats
->xmit_packets
= rp
->blue_stats
.xmit_packets
;
281 q_stats
->xmit_bytes
= rp
->blue_stats
.xmit_bytes
;
282 q_stats
->drop_packets
= rp
->blue_stats
.drop_packets
;
283 q_stats
->drop_bytes
= rp
->blue_stats
.drop_bytes
;
284 q_stats
->drop_forced
= rp
->blue_stats
.drop_forced
;
285 q_stats
->drop_unforced
= rp
->blue_stats
.drop_unforced
;
286 q_stats
->marked_packets
= rp
->blue_stats
.marked_packets
;
288 } while (/*CONSTCOND*/ 0);
293 struct blue_conf
*fc
;
296 fc
= (struct blue_conf
*)addr
;
297 if ((rqp
= altq_lookup(fc
->iface
.blue_ifname
,
298 ALTQT_BLUE
)) == NULL
) {
302 limit
= fc
->blue_limit
;
303 qlimit(rqp
->rq_q
) = limit
;
304 fc
->blue_limit
= limit
; /* write back the new value */
305 if (fc
->blue_pkttime
> 0)
306 rqp
->rq_blue
->blue_pkttime
= fc
->blue_pkttime
;
307 if (fc
->blue_max_pmark
> 0)
308 rqp
->rq_blue
->blue_max_pmark
= fc
->blue_max_pmark
;
309 if (fc
->blue_hold_time
> 0)
310 rqp
->rq_blue
->blue_hold_time
= fc
->blue_hold_time
;
311 rqp
->rq_blue
->blue_flags
= fc
->blue_flags
;
313 blue_init(rqp
->rq_blue
, rqp
->rq_blue
->blue_flags
,
314 rqp
->rq_blue
->blue_pkttime
,
315 rqp
->rq_blue
->blue_max_pmark
,
316 rqp
->rq_blue
->blue_hold_time
);
317 } while (/*CONSTCOND*/ 0);
328 blue_detach(blue_queue_t
*rqp
)
333 if (ALTQ_IS_ENABLED(rqp
->rq_ifq
))
334 altq_disable(rqp
->rq_ifq
);
336 if ((error
= altq_detach(rqp
->rq_ifq
)))
339 if (blue_list
== rqp
)
340 blue_list
= rqp
->rq_next
;
342 for (tmp
= blue_list
; tmp
!= NULL
; tmp
= tmp
->rq_next
)
343 if (tmp
->rq_next
== rqp
) {
344 tmp
->rq_next
= rqp
->rq_next
;
348 printf("blue_detach: no state found in blue_list!\n");
351 free(rqp
->rq_q
, M_DEVBUF
);
352 free(rqp
->rq_blue
, M_DEVBUF
);
358 * blue support routines
362 blue_init(blue_t
*rp
, int flags
, int pkttime
, int blue_max_pmark
,
368 rp
->blue_flags
= flags
;
369 rp
->blue_pkttime
= pkttime
;
370 rp
->blue_max_pmark
= blue_max_pmark
;
371 rp
->blue_hold_time
= blue_hold_time
;
373 rp
->blue_pkttime
= 1;
375 /* when the link is very slow, adjust blue parameters */
376 npkts_per_sec
= 1000000 / rp
->blue_pkttime
;
377 if (npkts_per_sec
< 50) {
379 else if (npkts_per_sec
< 300) {
382 microtime(&rp
->blue_last
);
389 * returns: 0 when successfully queued.
390 * ENOBUFS when drop occurs.
393 blue_enqueue(struct ifaltq
*ifq
, struct mbuf
*m
, struct altq_pktattr
*pktattr
)
395 blue_queue_t
*rqp
= (blue_queue_t
*)ifq
->altq_disc
;
398 if (blue_addq(rqp
->rq_blue
, rqp
->rq_q
, m
, pktattr
) == 0)
405 #define DTYPE_NODROP 0 /* no drop */
406 #define DTYPE_FORCED 1 /* a "forced" drop */
407 #define DTYPE_EARLY 2 /* an "unforced" (early) drop */
410 blue_addq(blue_t
*rp
, class_queue_t
*q
, struct mbuf
*m
,
411 struct altq_pktattr
*pktattr
)
416 * if we were idle, this is an enqueue onto an empty queue
417 * and we should decrement marking probability
425 t
= (now
.tv_sec
- rp
->blue_last
.tv_sec
);
428 microtime(&rp
->blue_last
);
430 t
= t
* 1000000 + (now
.tv_usec
- rp
->blue_last
.tv_usec
);
431 if (t
> rp
->blue_hold_time
) {
433 if (rp
->blue_pmark
< 0) rp
->blue_pmark
= 0;
434 microtime(&rp
->blue_last
);
439 /* see if we drop early */
440 droptype
= DTYPE_NODROP
;
441 if (drop_early(rp
) && qlen(q
) > 1) {
442 /* mark or drop by blue */
443 if ((rp
->blue_flags
& BLUEF_ECN
) &&
444 mark_ecn(m
, pktattr
, rp
->blue_flags
)) {
445 /* successfully marked. do not drop. */
447 rp
->blue_stats
.marked_packets
++;
450 /* unforced drop by blue */
451 droptype
= DTYPE_EARLY
;
456 * if the queue length hits the hard limit, it's a forced drop.
458 if (droptype
== DTYPE_NODROP
&& qlen(q
) >= qlimit(q
))
459 droptype
= DTYPE_FORCED
;
461 /* if successful or forced drop, enqueue this packet. */
462 if (droptype
!= DTYPE_EARLY
)
465 if (droptype
!= DTYPE_NODROP
) {
466 if (droptype
== DTYPE_EARLY
) {
467 /* drop the incoming packet */
469 rp
->blue_stats
.drop_unforced
++;
474 /* forced drop, select a victim packet in the queue. */
477 t
= (now
.tv_sec
- rp
->blue_last
.tv_sec
);
478 t
= t
* 1000000 + (now
.tv_usec
- rp
->blue_last
.tv_usec
);
479 if (t
> rp
->blue_hold_time
) {
480 rp
->blue_pmark
+= rp
->blue_max_pmark
>> 3;
481 if (rp
->blue_pmark
> rp
->blue_max_pmark
)
482 rp
->blue_pmark
= rp
->blue_max_pmark
;
483 microtime(&rp
->blue_last
);
486 rp
->blue_stats
.drop_forced
++;
490 rp
->blue_stats
.drop_packets
++;
491 rp
->blue_stats
.drop_bytes
+= m
->m_pkthdr
.len
;
496 /* successfully queued */
501 * early-drop probability is kept in blue_pmark
505 drop_early(blue_t
*rp
)
507 if ((arc4random() % rp
->blue_max_pmark
) < rp
->blue_pmark
) {
516 * try to mark CE bit to the packet.
517 * returns 1 if successfully marked, 0 otherwise.
520 mark_ecn(struct mbuf
*m
, struct altq_pktattr
*pktattr
, int flags
)
524 if (pktattr
== NULL
||
525 (pktattr
->pattr_af
!= AF_INET
&& pktattr
->pattr_af
!= AF_INET6
))
528 /* verify that pattr_hdr is within the mbuf data */
529 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
)
530 if (((char *)pktattr
->pattr_hdr
>= m0
->m_data
) &&
531 ((char *)pktattr
->pattr_hdr
< m0
->m_data
+ m0
->m_len
))
534 /* ick, pattr_hdr is stale */
535 pktattr
->pattr_af
= AF_UNSPEC
;
539 switch (pktattr
->pattr_af
) {
541 if (flags
& BLUEF_ECN4
) {
542 struct ip
*ip
= (struct ip
*)pktattr
->pattr_hdr
;
547 return (0); /* version mismatch! */
548 if ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_NOTECT
)
549 return (0); /* not-ECT */
550 if ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_CE
)
551 return (1); /* already marked */
554 * ecn-capable but not marked,
555 * mark CE and update checksum
558 ip
->ip_tos
|= IPTOS_ECN_CE
;
560 * update checksum (from RFC1624)
561 * HC' = ~(~HC + ~m + m')
563 sum
= ~ntohs(ip
->ip_sum
) & 0xffff;
564 sum
+= (~otos
& 0xffff) + ip
->ip_tos
;
565 sum
= (sum
>> 16) + (sum
& 0xffff);
566 sum
+= (sum
>> 16); /* add carry */
567 ip
->ip_sum
= htons(~sum
& 0xffff);
573 if (flags
& BLUEF_ECN6
) {
574 struct ip6_hdr
*ip6
= (struct ip6_hdr
*)pktattr
->pattr_hdr
;
577 flowlabel
= ntohl(ip6
->ip6_flow
);
578 if ((flowlabel
>> 28) != 6)
579 return (0); /* version mismatch! */
580 if ((flowlabel
& (IPTOS_ECN_MASK
<< 20)) ==
581 (IPTOS_ECN_NOTECT
<< 20))
582 return (0); /* not-ECT */
583 if ((flowlabel
& (IPTOS_ECN_MASK
<< 20)) ==
584 (IPTOS_ECN_CE
<< 20))
585 return (1); /* already marked */
587 * ecn-capable but not marked, mark CE
589 flowlabel
|= (IPTOS_ECN_CE
<< 20);
590 ip6
->ip6_flow
= htonl(flowlabel
);
603 * must be called in splnet.
605 * returns: mbuf dequeued.
606 * NULL when no packet is available in the queue.
610 blue_dequeue(struct ifaltq
* ifq
, int op
)
612 blue_queue_t
*rqp
= (blue_queue_t
*)ifq
->altq_disc
;
613 struct mbuf
*m
= NULL
;
615 if (op
== ALTDQ_POLL
)
616 return (qhead(rqp
->rq_q
));
618 m
= blue_getq(rqp
->rq_blue
, rqp
->rq_q
);
625 blue_getq(blue_t
*rp
, class_queue_t
*q
)
629 if ((m
= _getq(q
)) == NULL
) {
630 if (rp
->blue_idle
== 0) {
632 microtime(&rp
->blue_last
);
639 rp
->blue_stats
.xmit_packets
++;
640 rp
->blue_stats
.xmit_bytes
+= m
->m_pkthdr
.len
;
646 blue_request(struct ifaltq
*ifq
, int req
, void *arg
)
648 blue_queue_t
*rqp
= (blue_queue_t
*)ifq
->altq_disc
;
653 if (ALTQ_IS_ENABLED(ifq
))
663 static struct altqsw blue_sw
=
664 {"blue", blueopen
, blueclose
, blueioctl
};
666 ALTQ_MODULE(altq_blue
, ALTQT_BLUE
, &blue_sw
);
668 #endif /* KLD_MODULE */
670 #endif /* ALTQ3_COMPAT */
671 #endif /* ALTQ_BLUE */