1 /* $NetBSD: altq_red.c,v 1.27 2008/01/20 18:09:03 joerg Exp $ */
2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.27 2008/01/20 18:09:03 joerg Exp $");
72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
74 #include <sys/param.h>
75 #include <sys/malloc.h>
77 #include <sys/socket.h>
78 #include <sys/systm.h>
79 #include <sys/errno.h>
80 #include <sys/kauth.h>
81 #if 1 /* ALTQ3_COMPAT */
82 #include <sys/sockio.h>
84 #include <sys/kernel.h>
86 #include <sys/queue.h>
89 #endif /* ALTQ3_COMPAT */
93 #include <netinet/in.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
97 #include <netinet/ip6.h>
101 #include <net/pfvar.h>
103 #include <altq/altq.h>
104 #include <altq/altq_red.h>
106 #include <altq/altq_conf.h>
107 #ifdef ALTQ_FLOWVALVE
108 #include <altq/altq_flowvalve.h>
113 * ALTQ/RED (Random Early Detection) implementation using 32-bit
114 * fixed-point calculation.
116 * written by kjc using the ns code as a reference.
117 * you can learn more about red and ns from Sally's home page at
118 * http://www-nrg.ee.lbl.gov/floyd/
120 * most of the red parameter values are fixed in this implementation
121 * to prevent fixed-point overflow/underflow.
122 * if you change the parameters, watch out for overflow/underflow!
124 * the parameters used are recommended values by Sally.
125 * the corresponding ns config looks:
127 * minthresh=5 maxthresh=15 queue-size=60
130 * bytes=false (can't be handled by 32-bit fixed-point)
131 * doubleq=false dqthresh=false
135 * alternative red parameters for a slow link.
137 * assume the queue length becomes from zero to L and keeps L, it takes
138 * N packets for q_avg to reach 63% of L.
139 * when q_weight is 0.002, N is about 500 packets.
140 * for a slow link like dial-up, 500 packets takes more than 1 minute!
141 * when q_weight is 0.008, N is about 127 packets.
142 * when q_weight is 0.016, N is about 63 packets.
143 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
144 * are allowed for 0.016.
145 * see Sally's paper for more details.
147 /* normal red parameters */
148 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
149 /* q_weight = 0.00195 */
151 /* red parameters for a slow link */
152 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
153 /* q_weight = 0.0078125 */
155 /* red parameters for a very slow link (e.g., dialup) */
156 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
157 /* q_weight = 0.015625 */
159 /* fixed-point uses 12-bit decimal places */
160 #define FP_SHIFT 12 /* fixed-point shift */
162 /* red parameters for drop probability */
163 #define INV_P_MAX 10 /* inverse of max drop probability */
164 #define TH_MIN 5 /* min threshold */
165 #define TH_MAX 15 /* max threshold */
167 #define RED_LIMIT 60 /* default max queue lenght */
168 #define RED_STATS /* collect statistics */
171 * our default policy for forced-drop is drop-tail.
172 * (in altq-1.1.2 or earlier, the default was random-drop.
173 * but it makes more sense to punish the cause of the surge.)
174 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
178 #ifdef ALTQ_FLOWVALVE
180 * flow-valve is an extention to protect red from unresponsive flows
181 * and to promote end-to-end congestion control.
182 * flow-valve observes the average drop rates of the flows that have
183 * experienced packet drops in the recent past.
184 * when the average drop rate exceeds the threshold, the flow is
185 * blocked by the flow-valve. the trapped flow should back off
186 * exponentially to escape from the flow-valve.
188 #ifdef RED_RANDOM_DROP
189 #error "random-drop can't be used with flow-valve!"
191 #endif /* ALTQ_FLOWVALVE */
193 /* red_list keeps all red_queue_t's allocated. */
194 static red_queue_t
*red_list
= NULL
;
196 #endif /* ALTQ3_COMPAT */
198 /* default red parameter values */
199 static int default_th_min
= TH_MIN
;
200 static int default_th_max
= TH_MAX
;
201 static int default_inv_pmax
= INV_P_MAX
;
204 /* internal function prototypes */
205 static int red_enqueue(struct ifaltq
*, struct mbuf
*, struct altq_pktattr
*);
206 static struct mbuf
*red_dequeue(struct ifaltq
*, int);
207 static int red_request(struct ifaltq
*, int, void *);
208 static void red_purgeq(red_queue_t
*);
209 static int red_detach(red_queue_t
*);
210 #ifdef ALTQ_FLOWVALVE
211 static inline struct fve
*flowlist_lookup(struct flowvalve
*,
212 struct altq_pktattr
*, struct timeval
*);
213 static inline struct fve
*flowlist_reclaim(struct flowvalve
*,
214 struct altq_pktattr
*);
215 static inline void flowlist_move_to_head(struct flowvalve
*, struct fve
*);
216 static inline int fv_p2f(struct flowvalve
*, int);
217 static struct flowvalve
*fv_alloc(struct red
*);
218 static void fv_destroy(struct flowvalve
*);
219 static int fv_checkflow(struct flowvalve
*, struct altq_pktattr
*,
221 static void fv_dropbyred(struct flowvalve
*fv
, struct altq_pktattr
*,
224 #endif /* ALTQ3_COMPAT */
227 * red support routines
230 red_alloc(int weight
, int inv_pmax
, int th_min
, int th_max
, int flags
,
237 rp
= malloc(sizeof(red_t
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
245 rp
->red_weight
= W_WEIGHT
;
247 rp
->red_weight
= weight
;
249 rp
->red_inv_pmax
= default_inv_pmax
;
251 rp
->red_inv_pmax
= inv_pmax
;
253 rp
->red_thmin
= default_th_min
;
255 rp
->red_thmin
= th_min
;
257 rp
->red_thmax
= default_th_max
;
259 rp
->red_thmax
= th_max
;
261 rp
->red_flags
= flags
;
264 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
265 rp
->red_pkttime
= 800;
267 rp
->red_pkttime
= pkttime
;
270 /* when the link is very slow, adjust red parameters */
271 npkts_per_sec
= 1000000 / rp
->red_pkttime
;
272 if (npkts_per_sec
< 50) {
273 /* up to about 400Kbps */
274 rp
->red_weight
= W_WEIGHT_2
;
275 } else if (npkts_per_sec
< 300) {
276 /* up to about 2.4Mbps */
277 rp
->red_weight
= W_WEIGHT_1
;
281 /* calculate wshift. weight must be power of 2 */
283 for (i
= 0; w
> 1; i
++)
286 w
= 1 << rp
->red_wshift
;
287 if (w
!= rp
->red_weight
) {
288 printf("invalid weight value %d for red! use %d\n",
294 * thmin_s and thmax_s are scaled versions of th_min and th_max
295 * to be compared with avg.
297 rp
->red_thmin_s
= rp
->red_thmin
<< (rp
->red_wshift
+ FP_SHIFT
);
298 rp
->red_thmax_s
= rp
->red_thmax
<< (rp
->red_wshift
+ FP_SHIFT
);
301 * precompute probability denominator
302 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
304 rp
->red_probd
= (2 * (rp
->red_thmax
- rp
->red_thmin
)
305 * rp
->red_inv_pmax
) << FP_SHIFT
;
307 /* allocate weight table */
308 rp
->red_wtab
= wtab_alloc(rp
->red_weight
);
310 microtime(&rp
->red_last
);
312 #ifdef ALTQ_FLOWVALVE
313 if (flags
& REDF_FLOWVALVE
)
314 rp
->red_flowvalve
= fv_alloc(rp
);
315 /* if fv_alloc failes, flowvalve is just disabled */
317 #endif /* ALTQ3_COMPAT */
322 red_destroy(red_t
*rp
)
325 #ifdef ALTQ_FLOWVALVE
326 if (rp
->red_flowvalve
!= NULL
)
327 fv_destroy(rp
->red_flowvalve
);
329 #endif /* ALTQ3_COMPAT */
330 wtab_destroy(rp
->red_wtab
);
335 red_getstats(red_t
*rp
, struct redstats
*sp
)
337 sp
->q_avg
= rp
->red_avg
>> rp
->red_wshift
;
338 sp
->xmit_cnt
= rp
->red_stats
.xmit_cnt
;
339 sp
->drop_cnt
= rp
->red_stats
.drop_cnt
;
340 sp
->drop_forced
= rp
->red_stats
.drop_forced
;
341 sp
->drop_unforced
= rp
->red_stats
.drop_unforced
;
342 sp
->marked_packets
= rp
->red_stats
.marked_packets
;
346 red_addq(red_t
*rp
, class_queue_t
*q
, struct mbuf
*m
,
347 struct altq_pktattr
*pktattr
)
352 #ifdef ALTQ_FLOWVALVE
353 struct fve
*fve
= NULL
;
355 if (rp
->red_flowvalve
!= NULL
&& rp
->red_flowvalve
->fv_flows
> 0)
356 if (fv_checkflow(rp
->red_flowvalve
, pktattr
, &fve
)) {
361 #endif /* ALTQ3_COMPAT */
366 * if we were idle, we pretend that n packets arrived during
375 t
= (now
.tv_sec
- rp
->red_last
.tv_sec
);
378 * being idle for more than 1 minute, set avg to zero.
379 * this prevents t from overflow.
383 t
= t
* 1000000 + (now
.tv_usec
- rp
->red_last
.tv_usec
);
384 n
= t
/ rp
->red_pkttime
- 1;
386 /* the following line does (avg = (1 - Wq)^n * avg) */
388 avg
= (avg
>> FP_SHIFT
) *
389 pow_w(rp
->red_wtab
, n
);
393 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
394 avg
+= (qlen(q
) << FP_SHIFT
) - (avg
>> rp
->red_wshift
);
395 rp
->red_avg
= avg
; /* save the new value */
398 * red_count keeps a tally of arriving traffic that has not
403 /* see if we drop early */
404 droptype
= DTYPE_NODROP
;
405 if (avg
>= rp
->red_thmin_s
&& qlen(q
) > 1) {
406 if (avg
>= rp
->red_thmax_s
) {
407 /* avg >= th_max: forced drop */
408 droptype
= DTYPE_FORCED
;
409 } else if (rp
->red_old
== 0) {
410 /* first exceeds th_min */
413 } else if (drop_early((avg
- rp
->red_thmin_s
) >> rp
->red_wshift
,
414 rp
->red_probd
, rp
->red_count
)) {
415 /* mark or drop by red */
416 if ((rp
->red_flags
& REDF_ECN
) &&
417 mark_ecn(m
, pktattr
, rp
->red_flags
)) {
418 /* successfully marked. do not drop. */
421 rp
->red_stats
.marked_packets
++;
424 /* unforced drop by red */
425 droptype
= DTYPE_EARLY
;
434 * if the queue length hits the hard limit, it's a forced drop.
436 if (droptype
== DTYPE_NODROP
&& qlen(q
) >= qlimit(q
))
437 droptype
= DTYPE_FORCED
;
439 #ifdef RED_RANDOM_DROP
440 /* if successful or forced drop, enqueue this packet. */
441 if (droptype
!= DTYPE_EARLY
)
444 /* if successful, enqueue this packet. */
445 if (droptype
== DTYPE_NODROP
)
448 if (droptype
!= DTYPE_NODROP
) {
449 if (droptype
== DTYPE_EARLY
) {
450 /* drop the incoming packet */
452 rp
->red_stats
.drop_unforced
++;
455 /* forced drop, select a victim packet in the queue. */
456 #ifdef RED_RANDOM_DROP
460 rp
->red_stats
.drop_forced
++;
464 PKTCNTR_ADD(&rp
->red_stats
.drop_cnt
, m_pktlen(m
));
468 #ifdef ALTQ_FLOWVALVE
469 if (rp
->red_flowvalve
!= NULL
)
470 fv_dropbyred(rp
->red_flowvalve
, pktattr
, fve
);
472 #endif /* ALTQ3_COMPAT */
476 /* successfully queued */
478 PKTCNTR_ADD(&rp
->red_stats
.xmit_cnt
, m_pktlen(m
));
484 * early-drop probability is calculated as follows:
485 * prob = p_max * (avg - th_min) / (th_max - th_min)
486 * prob_a = prob / (2 - count*prob)
487 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
488 * here prob_a increases as successive undrop count increases.
489 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
490 * becomes 1 when (count >= (2 / prob))).
493 drop_early(int fp_len
, int fp_probd
, int count
)
495 int d
; /* denominator of drop-probability */
497 d
= fp_probd
- count
* fp_len
;
499 /* count exceeds the hard limit: drop or mark */
503 * now the range of d is [1..600] in fixed-point. (when
504 * th_max-th_min=10 and p_max=1/30)
505 * drop probability = (avg - TH_MIN) / d
508 if ((arc4random() % d
) < fp_len
) {
517 * try to mark CE bit to the packet.
518 * returns 1 if successfully marked, 0 otherwise.
521 mark_ecn(struct mbuf
*m
, struct altq_pktattr
*pktattr
, int flags
)
529 t
= m_tag_find(m
, PACKET_TAG_ALTQ_QID
, NULL
);
531 at
= (struct altq_tag
*)(t
+ 1);
537 } else if (pktattr
!= NULL
) {
538 af
= pktattr
->pattr_af
;
539 hdr
= pktattr
->pattr_hdr
;
540 #endif /* ALTQ3_COMPAT */
544 if (af
!= AF_INET
&& af
!= AF_INET6
)
547 /* verify that pattr_hdr is within the mbuf data */
548 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
)
549 if (((char *)hdr
>= m0
->m_data
) &&
550 ((char *)hdr
< m0
->m_data
+ m0
->m_len
))
553 /* ick, tag info is stale */
559 if (flags
& REDF_ECN4
) {
565 return (0); /* version mismatch! */
567 if ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_NOTECT
)
568 return (0); /* not-ECT */
569 if ((ip
->ip_tos
& IPTOS_ECN_MASK
) == IPTOS_ECN_CE
)
570 return (1); /* already marked */
573 * ecn-capable but not marked,
574 * mark CE and update checksum
577 ip
->ip_tos
|= IPTOS_ECN_CE
;
579 * update checksum (from RFC1624)
580 * HC' = ~(~HC + ~m + m')
582 sum
= ~ntohs(ip
->ip_sum
) & 0xffff;
583 sum
+= (~otos
& 0xffff) + ip
->ip_tos
;
584 sum
= (sum
>> 16) + (sum
& 0xffff);
585 sum
+= (sum
>> 16); /* add carry */
586 ip
->ip_sum
= htons(~sum
& 0xffff);
592 if (flags
& REDF_ECN6
) {
593 struct ip6_hdr
*ip6
= hdr
;
596 flowlabel
= ntohl(ip6
->ip6_flow
);
597 if ((flowlabel
>> 28) != 6)
598 return (0); /* version mismatch! */
599 if ((flowlabel
& (IPTOS_ECN_MASK
<< 20)) ==
600 (IPTOS_ECN_NOTECT
<< 20))
601 return (0); /* not-ECT */
602 if ((flowlabel
& (IPTOS_ECN_MASK
<< 20)) ==
603 (IPTOS_ECN_CE
<< 20))
604 return (1); /* already marked */
606 * ecn-capable but not marked, mark CE
608 flowlabel
|= (IPTOS_ECN_CE
<< 20);
609 ip6
->ip6_flow
= htonl(flowlabel
);
621 red_getq(red_t
*rp
, class_queue_t
*q
)
625 if ((m
= _getq(q
)) == NULL
) {
626 if (rp
->red_idle
== 0) {
628 microtime(&rp
->red_last
);
638 * helper routine to calibrate avg during idle.
639 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
640 * here Wq = 1/weight and the code assumes Wq is close to zero.
642 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
644 static struct wtab
*wtab_list
= NULL
; /* pointer to wtab list */
647 wtab_alloc(int weight
)
652 for (w
= wtab_list
; w
!= NULL
; w
= w
->w_next
)
653 if (w
->w_weight
== weight
) {
658 w
= malloc(sizeof(struct wtab
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
660 panic("wtab_alloc: malloc failed!");
661 w
->w_weight
= weight
;
663 w
->w_next
= wtab_list
;
666 /* initialize the weight table */
667 w
->w_tab
[0] = ((weight
- 1) << FP_SHIFT
) / weight
;
668 for (i
= 1; i
< 32; i
++) {
669 w
->w_tab
[i
] = (w
->w_tab
[i
-1] * w
->w_tab
[i
-1]) >> FP_SHIFT
;
670 if (w
->w_tab
[i
] == 0 && w
->w_param_max
== 0)
671 w
->w_param_max
= 1 << i
;
678 wtab_destroy(struct wtab
*w
)
682 if (--w
->w_refcount
> 0)
686 wtab_list
= w
->w_next
;
687 else for (prev
= wtab_list
; prev
->w_next
!= NULL
; prev
= prev
->w_next
)
688 if (prev
->w_next
== w
) {
689 prev
->w_next
= w
->w_next
;
698 pow_w(struct wtab
*w
, int n
)
703 if (n
>= w
->w_param_max
)
714 val
= (val
* w
->w_tab
[i
]) >> FP_SHIFT
;
725 * red device interface
730 redopen(dev_t dev
, int flag
, int fmt
,
733 /* everything will be done when the queueing scheme is attached. */
738 redclose(dev_t dev
, int flag
, int fmt
,
744 while ((rqp
= red_list
) != NULL
) {
746 err
= red_detach(rqp
);
747 if (err
!= 0 && error
== 0)
755 redioctl(dev_t dev
, ioctlcmd_t cmd
, void *addr
, int flag
,
759 struct red_interface
*ifacep
;
761 struct proc
*p
= l
->l_proc
;
764 /* check super-user privilege */
769 #if (__FreeBSD_version > 400000)
770 if ((error
= suser(p
)) != 0)
772 if ((error
= kauth_authorize_network(p
->p_cred
,
773 KAUTH_NETWORK_ALTQ
, KAUTH_REQ_NETWORK_ALTQ_RED
, NULL
,
783 ifacep
= (struct red_interface
*)addr
;
784 if ((rqp
= altq_lookup(ifacep
->red_ifname
, ALTQT_RED
)) == NULL
) {
788 error
= altq_enable(rqp
->rq_ifq
);
792 ifacep
= (struct red_interface
*)addr
;
793 if ((rqp
= altq_lookup(ifacep
->red_ifname
, ALTQT_RED
)) == NULL
) {
797 error
= altq_disable(rqp
->rq_ifq
);
801 ifp
= ifunit(((struct red_interface
*)addr
)->red_ifname
);
807 /* allocate and initialize red_queue_t */
808 rqp
= malloc(sizeof(red_queue_t
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
814 rqp
->rq_q
= malloc(sizeof(class_queue_t
), M_DEVBUF
,
816 if (rqp
->rq_q
== NULL
) {
822 rqp
->rq_red
= red_alloc(0, 0, 0, 0, 0, 0);
823 if (rqp
->rq_red
== NULL
) {
824 free(rqp
->rq_q
, M_DEVBUF
);
830 rqp
->rq_ifq
= &ifp
->if_snd
;
831 qtail(rqp
->rq_q
) = NULL
;
833 qlimit(rqp
->rq_q
) = RED_LIMIT
;
834 qtype(rqp
->rq_q
) = Q_RED
;
837 * set RED to this ifnet structure.
839 error
= altq_attach(rqp
->rq_ifq
, ALTQT_RED
, rqp
,
840 red_enqueue
, red_dequeue
, red_request
,
843 red_destroy(rqp
->rq_red
);
844 free(rqp
->rq_q
, M_DEVBUF
);
849 /* add this state to the red list */
850 rqp
->rq_next
= red_list
;
855 ifacep
= (struct red_interface
*)addr
;
856 if ((rqp
= altq_lookup(ifacep
->red_ifname
, ALTQT_RED
)) == NULL
) {
860 error
= red_detach(rqp
);
865 struct red_stats
*q_stats
;
868 q_stats
= (struct red_stats
*)addr
;
869 if ((rqp
= altq_lookup(q_stats
->iface
.red_ifname
,
870 ALTQT_RED
)) == NULL
) {
875 q_stats
->q_len
= qlen(rqp
->rq_q
);
876 q_stats
->q_limit
= qlimit(rqp
->rq_q
);
879 q_stats
->q_avg
= rp
->red_avg
>> rp
->red_wshift
;
880 q_stats
->xmit_cnt
= rp
->red_stats
.xmit_cnt
;
881 q_stats
->drop_cnt
= rp
->red_stats
.drop_cnt
;
882 q_stats
->drop_forced
= rp
->red_stats
.drop_forced
;
883 q_stats
->drop_unforced
= rp
->red_stats
.drop_unforced
;
884 q_stats
->marked_packets
= rp
->red_stats
.marked_packets
;
886 q_stats
->weight
= rp
->red_weight
;
887 q_stats
->inv_pmax
= rp
->red_inv_pmax
;
888 q_stats
->th_min
= rp
->red_thmin
;
889 q_stats
->th_max
= rp
->red_thmax
;
891 #ifdef ALTQ_FLOWVALVE
892 if (rp
->red_flowvalve
!= NULL
) {
893 struct flowvalve
*fv
= rp
->red_flowvalve
;
894 q_stats
->fv_flows
= fv
->fv_flows
;
895 q_stats
->fv_pass
= fv
->fv_stats
.pass
;
896 q_stats
->fv_predrop
= fv
->fv_stats
.predrop
;
897 q_stats
->fv_alloc
= fv
->fv_stats
.alloc
;
898 q_stats
->fv_escape
= fv
->fv_stats
.escape
;
900 #endif /* ALTQ_FLOWVALVE */
901 q_stats
->fv_flows
= 0;
902 q_stats
->fv_pass
= 0;
903 q_stats
->fv_predrop
= 0;
904 q_stats
->fv_alloc
= 0;
905 q_stats
->fv_escape
= 0;
906 #ifdef ALTQ_FLOWVALVE
908 #endif /* ALTQ_FLOWVALVE */
909 } while (/*CONSTCOND*/ 0);
918 fc
= (struct red_conf
*)addr
;
919 if ((rqp
= altq_lookup(fc
->iface
.red_ifname
,
920 ALTQT_RED
)) == NULL
) {
924 new = red_alloc(fc
->red_weight
,
937 limit
= fc
->red_limit
;
938 if (limit
< fc
->red_thmax
)
939 limit
= fc
->red_thmax
;
940 qlimit(rqp
->rq_q
) = limit
;
941 fc
->red_limit
= limit
; /* write back the new value */
943 red_destroy(rqp
->rq_red
);
948 /* write back new values */
949 fc
->red_limit
= limit
;
950 fc
->red_inv_pmax
= rqp
->rq_red
->red_inv_pmax
;
951 fc
->red_thmin
= rqp
->rq_red
->red_thmin
;
952 fc
->red_thmax
= rqp
->rq_red
->red_thmax
;
954 } while (/*CONSTCOND*/ 0);
957 case RED_SETDEFAULTS
:
959 struct redparams
*rp
;
961 rp
= (struct redparams
*)addr
;
963 default_th_min
= rp
->th_min
;
964 default_th_max
= rp
->th_max
;
965 default_inv_pmax
= rp
->inv_pmax
;
966 } while (/*CONSTCOND*/ 0);
977 red_detach(red_queue_t
*rqp
)
982 if (ALTQ_IS_ENABLED(rqp
->rq_ifq
))
983 altq_disable(rqp
->rq_ifq
);
985 if ((error
= altq_detach(rqp
->rq_ifq
)))
989 red_list
= rqp
->rq_next
;
991 for (tmp
= red_list
; tmp
!= NULL
; tmp
= tmp
->rq_next
)
992 if (tmp
->rq_next
== rqp
) {
993 tmp
->rq_next
= rqp
->rq_next
;
997 printf("red_detach: no state found in red_list!\n");
1000 red_destroy(rqp
->rq_red
);
1001 free(rqp
->rq_q
, M_DEVBUF
);
1002 free(rqp
, M_DEVBUF
);
1009 * returns: 0 when successfully queued.
1010 * ENOBUFS when drop occurs.
1013 red_enqueue(struct ifaltq
*ifq
, struct mbuf
*m
, struct altq_pktattr
*pktattr
)
1015 red_queue_t
*rqp
= (red_queue_t
*)ifq
->altq_disc
;
1017 if (red_addq(rqp
->rq_red
, rqp
->rq_q
, m
, pktattr
) < 0)
1025 * must be called in splnet.
1027 * returns: mbuf dequeued.
1028 * NULL when no packet is available in the queue.
1031 static struct mbuf
*
1032 red_dequeue(struct ifaltq
*ifq
, int op
)
1034 red_queue_t
*rqp
= (red_queue_t
*)ifq
->altq_disc
;
1037 if (op
== ALTDQ_POLL
)
1038 return qhead(rqp
->rq_q
);
1040 /* op == ALTDQ_REMOVE */
1041 m
= red_getq(rqp
->rq_red
, rqp
->rq_q
);
1048 red_request(struct ifaltq
*ifq
, int req
, void *arg
)
1050 red_queue_t
*rqp
= (red_queue_t
*)ifq
->altq_disc
;
1061 red_purgeq(red_queue_t
*rqp
)
1064 if (ALTQ_IS_ENABLED(rqp
->rq_ifq
))
1065 rqp
->rq_ifq
->ifq_len
= 0;
1068 #ifdef ALTQ_FLOWVALVE
1070 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1071 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1072 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1073 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1074 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1075 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1077 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1078 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1080 #define FV_N 10 /* update fve_f every FV_N packets */
1082 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1083 #define FV_TTHRESH 3 /* time threshold to delete fve */
1084 #define FV_ALPHA 5 /* extra packet count */
1088 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1091 * Brtt table: 127 entry table to convert drop rate (p) to
1092 * the corresponding bandwidth fraction (f)
1093 * the following equation is implemented to use scaled values,
1094 * fve_p and fve_f, in the fixed point format.
1096 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1097 * f = Brtt(p) / (max_th + alpha)
1099 #define BRTT_SIZE 128
1100 #define BRTT_SHIFT 12
1101 #define BRTT_MASK 0x0007f000
1102 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1104 const int brtt_tab
[BRTT_SIZE
] = {
1105 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1106 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1107 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1108 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1109 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1110 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1111 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1112 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1113 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1114 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1115 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1116 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1117 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1118 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1119 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1120 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1123 static inline struct fve
*
1124 flowlist_lookup(struct flowvalve
*fv
, struct altq_pktattr
*pktattr
,
1125 struct timeval
*now
)
1131 struct ip6_hdr
*ip6
;
1133 struct timeval tthresh
;
1135 if (pktattr
== NULL
)
1138 tthresh
.tv_sec
= now
->tv_sec
- FV_TTHRESH
;
1141 * search the flow list
1143 switch (pktattr
->pattr_af
) {
1145 ip
= (struct ip
*)pktattr
->pattr_hdr
;
1146 TAILQ_FOREACH(fve
, &fv
->fv_flowlist
, fve_lru
){
1147 if (fve
->fve_lastdrop
.tv_sec
== 0)
1149 if (fve
->fve_lastdrop
.tv_sec
< tthresh
.tv_sec
) {
1150 fve
->fve_lastdrop
.tv_sec
= 0;
1153 if (fve
->fve_flow
.flow_af
== AF_INET
&&
1154 fve
->fve_flow
.flow_ip
.ip_src
.s_addr
==
1155 ip
->ip_src
.s_addr
&&
1156 fve
->fve_flow
.flow_ip
.ip_dst
.s_addr
==
1164 ip6
= (struct ip6_hdr
*)pktattr
->pattr_hdr
;
1165 TAILQ_FOREACH(fve
, &fv
->fv_flowlist
, fve_lru
){
1166 if (fve
->fve_lastdrop
.tv_sec
== 0)
1168 if (fve
->fve_lastdrop
.tv_sec
< tthresh
.tv_sec
) {
1169 fve
->fve_lastdrop
.tv_sec
= 0;
1172 if (fve
->fve_flow
.flow_af
== AF_INET6
&&
1173 IN6_ARE_ADDR_EQUAL(&fve
->fve_flow
.flow_ip6
.ip6_src
,
1175 IN6_ARE_ADDR_EQUAL(&fve
->fve_flow
.flow_ip6
.ip6_dst
,
1184 /* unknown protocol. no drop. */
1187 fv
->fv_flows
= flows
; /* save the number of active fve's */
1191 static inline struct fve
*
1192 flowlist_reclaim(struct flowvalve
*fv
, struct altq_pktattr
*pktattr
)
1197 struct ip6_hdr
*ip6
;
1201 * get an entry from the tail of the LRU list.
1203 fve
= TAILQ_LAST(&fv
->fv_flowlist
, fv_flowhead
);
1205 switch (pktattr
->pattr_af
) {
1207 ip
= (struct ip
*)pktattr
->pattr_hdr
;
1208 fve
->fve_flow
.flow_af
= AF_INET
;
1209 fve
->fve_flow
.flow_ip
.ip_src
= ip
->ip_src
;
1210 fve
->fve_flow
.flow_ip
.ip_dst
= ip
->ip_dst
;
1214 ip6
= (struct ip6_hdr
*)pktattr
->pattr_hdr
;
1215 fve
->fve_flow
.flow_af
= AF_INET6
;
1216 fve
->fve_flow
.flow_ip6
.ip6_src
= ip6
->ip6_src
;
1217 fve
->fve_flow
.flow_ip6
.ip6_dst
= ip6
->ip6_dst
;
1222 fve
->fve_state
= Green
;
1225 fve
->fve_ifseq
= fv
->fv_ifseq
- 1;
1230 fv
->fv_stats
.alloc
++;
1236 flowlist_move_to_head(struct flowvalve
*fv
, struct fve
*fve
)
1238 if (TAILQ_FIRST(&fv
->fv_flowlist
) != fve
) {
1239 TAILQ_REMOVE(&fv
->fv_flowlist
, fve
, fve_lru
);
1240 TAILQ_INSERT_HEAD(&fv
->fv_flowlist
, fve
, fve_lru
);
1245 * allocate flowvalve structure
1247 static struct flowvalve
*
1248 fv_alloc(struct red
*rp
)
1250 struct flowvalve
*fv
;
1254 num
= FV_FLOWLISTSIZE
;
1255 fv
= malloc(sizeof(struct flowvalve
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
1259 fv
->fv_fves
= malloc(sizeof(struct fve
) * num
, M_DEVBUF
,
1261 if (fv
->fv_fves
== NULL
) {
1267 TAILQ_INIT(&fv
->fv_flowlist
);
1268 for (i
= 0; i
< num
; i
++) {
1269 fve
= &fv
->fv_fves
[i
];
1270 fve
->fve_lastdrop
.tv_sec
= 0;
1271 TAILQ_INSERT_TAIL(&fv
->fv_flowlist
, fve
, fve_lru
);
1274 /* initialize drop rate threshold in scaled fixed-point */
1275 fv
->fv_pthresh
= (FV_PSCALE(1) << FP_SHIFT
) / rp
->red_inv_pmax
;
1277 /* initialize drop rate to fraction table */
1278 fv
->fv_p2ftab
= malloc(sizeof(int) * BRTT_SIZE
, M_DEVBUF
, M_WAITOK
);
1279 if (fv
->fv_p2ftab
== NULL
) {
1280 free(fv
->fv_fves
, M_DEVBUF
);
1285 * create the p2f table.
1286 * (shift is used to keep the precision)
1288 for (i
= 1; i
< BRTT_SIZE
; i
++) {
1291 f
= brtt_tab
[i
] << 8;
1292 fv
->fv_p2ftab
[i
] = (f
/ (rp
->red_thmax
+ FV_ALPHA
)) >> 8;
1299 fv_destroy(struct flowvalve
*fv
)
1301 free(fv
->fv_p2ftab
, M_DEVBUF
);
1302 free(fv
->fv_fves
, M_DEVBUF
);
1307 fv_p2f(struct flowvalve
*fv
, int p
)
1312 f
= fv
->fv_p2ftab
[BRTT_SIZE
-1];
1313 else if ((val
= (p
& BRTT_MASK
)))
1314 f
= fv
->fv_p2ftab
[(val
>> BRTT_SHIFT
)];
1316 f
= fv
->fv_p2ftab
[1];
1321 * check if an arriving packet should be pre-dropped.
1322 * called from red_addq() when a packet arrives.
1323 * returns 1 when the packet should be pre-dropped.
1324 * should be called in splnet.
1327 fv_checkflow(struct flowvalve
*fv
, struct altq_pktattr
*pktattr
,
1328 struct fve
**fcache
)
1336 if ((fve
= flowlist_lookup(fv
, pktattr
, &now
)) == NULL
)
1337 /* no matching entry in the flowlist */
1342 /* update fraction f for every FV_N packets */
1343 if (++fve
->fve_count
== FV_N
) {
1345 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1348 (FV_N
<< FP_SHIFT
) / (fv
->fv_ifseq
- fve
->fve_ifseq
)
1349 + fve
->fve_f
- FV_FUNSCALE(fve
->fve_f
);
1350 fve
->fve_ifseq
= fv
->fv_ifseq
;
1357 if (fve
->fve_state
== Green
&& fve
->fve_p
> fv
->fv_pthresh
) {
1360 /* calculate a threshold */
1361 fthresh
= fv_p2f(fv
, fve
->fve_p
);
1362 if (fve
->fve_f
> fthresh
)
1363 fve
->fve_state
= Red
;
1366 if (fve
->fve_state
== Red
) {
1370 if (now
.tv_sec
- fve
->fve_lastdrop
.tv_sec
> FV_BACKOFFTHRESH
) {
1371 /* no drop for at least FV_BACKOFFTHRESH sec */
1373 fve
->fve_state
= Green
;
1375 fv
->fv_stats
.escape
++;
1378 /* block this flow */
1379 flowlist_move_to_head(fv
, fve
);
1380 fve
->fve_lastdrop
= now
;
1382 fv
->fv_stats
.predrop
++;
1391 fve
->fve_p
-= FV_PUNSCALE(fve
->fve_p
);
1395 fv
->fv_stats
.pass
++;
1401 * called from red_addq when a packet is dropped by red.
1402 * should be called in splnet.
1405 fv_dropbyred(struct flowvalve
*fv
, struct altq_pktattr
*pktattr
,
1411 if (pktattr
== NULL
)
1416 /* the fve of this packet is already cached */
1418 else if ((fve
= flowlist_lookup(fv
, pktattr
, &now
)) == NULL
)
1419 fve
= flowlist_reclaim(fv
, pktattr
);
1421 flowlist_move_to_head(fv
, fve
);
1424 * update p: the following line cancels the update
1425 * in fv_checkflow() and calculate
1426 * p = Wp + (1 - Wp) * p
1428 fve
->fve_p
= (1 << FP_SHIFT
) + fve
->fve_p
;
1430 fve
->fve_lastdrop
= now
;
1433 #endif /* ALTQ_FLOWVALVE */
1437 static struct altqsw red_sw
=
1438 {"red", redopen
, redclose
, redioctl
};
1440 ALTQ_MODULE(altq_red
, ALTQT_RED
, &red_sw
);
1441 MODULE_VERSION(altq_red
, 1);
1443 #endif /* KLD_MODULE */
1444 #endif /* ALTQ3_COMPAT */
1446 #endif /* ALTQ_RED */