2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/if_ether.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/etherdevice.h>
31 #include <linux/notifier.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
36 #include <net/pkt_sched.h>
39 /* Class-Based Queueing (CBQ) algorithm.
40 =======================================
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43 Management Models for Packet Networks",
44 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
54 -----------------------------------------------------------------------
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
61 --- The WRR algorithm is different. Our version looks more
62 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
87 struct cbq_sched_data
;
92 struct cbq_class
*next
; /* hash table link */
93 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
97 unsigned char priority
; /* class priority */
98 unsigned char priority2
; /* priority to be used after overlimit */
99 unsigned char ewma_log
; /* time constant for idle time calculation */
100 unsigned char ovl_strategy
;
101 #ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police
;
107 /* Link-sharing scheduler parameters */
108 long maxidle
; /* Class parameters: see below. */
112 struct qdisc_rate_table
*R_tab
;
114 /* Overlimit strategy parameters */
115 void (*overlimit
)(struct cbq_class
*cl
);
118 /* General scheduler (WRR) parameters */
120 long quantum
; /* Allotment per WRR round */
121 long weight
; /* Relative allotment: see below */
123 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
124 struct cbq_class
*split
; /* Ptr to split node */
125 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
126 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
127 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
129 struct cbq_class
*sibling
; /* Sibling chain */
130 struct cbq_class
*children
; /* Pointer to children chain */
132 struct Qdisc
*q
; /* Elementary queueing discipline */
136 unsigned char cpriority
; /* Effective priority */
137 unsigned char delayed
;
138 unsigned char level
; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
143 psched_time_t last
; /* Last end of service */
144 psched_time_t undertime
;
146 long deficit
; /* Saved deficit for WRR */
147 unsigned long penalized
;
148 struct gnet_stats_basic bstats
;
149 struct gnet_stats_queue qstats
;
150 struct gnet_stats_rate_est rate_est
;
151 spinlock_t
*stats_lock
;
152 struct tc_cbq_xstats xstats
;
154 struct tcf_proto
*filter_list
;
159 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
162 struct cbq_sched_data
164 struct cbq_class
*classes
[16]; /* Hash table of all classes */
165 int nclasses
[TC_CBQ_MAXPRIO
+1];
166 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
168 struct cbq_class link
;
171 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
174 #ifdef CONFIG_NET_CLS_POLICE
175 struct cbq_class
*rx_class
;
177 struct cbq_class
*tx_class
;
178 struct cbq_class
*tx_borrowed
;
180 psched_time_t now
; /* Cached timestamp */
181 psched_time_t now_rt
; /* Cached real time */
184 struct timer_list delay_timer
;
185 struct timer_list wd_timer
; /* Watchdog timer,
195 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
198 static __inline__
unsigned cbq_hash(u32 h
)
205 static __inline__
struct cbq_class
*
206 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
208 struct cbq_class
*cl
;
210 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
211 if (cl
->classid
== classid
)
216 #ifdef CONFIG_NET_CLS_POLICE
218 static struct cbq_class
*
219 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
221 struct cbq_class
*cl
, *new;
223 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
224 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
232 /* Classify packet. The procedure is pretty complicated, but
233 it allows us to combine link sharing and priority scheduling
236 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237 so that it resolves to split nodes. Then packets are classified
238 by logical priority, or a more specific classifier may be attached
242 static struct cbq_class
*
243 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
245 struct cbq_sched_data
*q
= qdisc_priv(sch
);
246 struct cbq_class
*head
= &q
->link
;
247 struct cbq_class
**defmap
;
248 struct cbq_class
*cl
= NULL
;
249 u32 prio
= skb
->priority
;
250 struct tcf_result res
;
253 * Step 1. If skb->priority points to one of our classes, use it.
255 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
256 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
259 *qerr
= NET_XMIT_BYPASS
;
262 defmap
= head
->defaults
;
265 * Step 2+n. Apply classifier.
267 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
270 if ((cl
= (void*)res
.class) == NULL
) {
271 if (TC_H_MAJ(res
.classid
))
272 cl
= cbq_class_lookup(q
, res
.classid
);
273 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
274 cl
= defmap
[TC_PRIO_BESTEFFORT
];
276 if (cl
== NULL
|| cl
->level
>= head
->level
)
280 #ifdef CONFIG_NET_CLS_ACT
284 *qerr
= NET_XMIT_SUCCESS
;
288 #elif defined(CONFIG_NET_CLS_POLICE)
290 case TC_POLICE_RECLASSIFY
:
291 return cbq_reclassify(skb
, cl
);
302 * Step 3+n. If classifier selected a link sharing class,
303 * apply agency specific classifier.
304 * Repeat this procdure until we hit a leaf node.
313 * Step 4. No success...
315 if (TC_H_MAJ(prio
) == 0 &&
316 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
317 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
324 A packet has just been enqueued on the empty class.
325 cbq_activate_class adds it to the tail of active class list
326 of its priority band.
329 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
331 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
332 int prio
= cl
->cpriority
;
333 struct cbq_class
*cl_tail
;
335 cl_tail
= q
->active
[prio
];
336 q
->active
[prio
] = cl
;
338 if (cl_tail
!= NULL
) {
339 cl
->next_alive
= cl_tail
->next_alive
;
340 cl_tail
->next_alive
= cl
;
343 q
->activemask
|= (1<<prio
);
348 Unlink class from active chain.
349 Note that this same procedure is done directly in cbq_dequeue*
350 during round-robin procedure.
353 static void cbq_deactivate_class(struct cbq_class
*this)
355 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
356 int prio
= this->cpriority
;
357 struct cbq_class
*cl
;
358 struct cbq_class
*cl_prev
= q
->active
[prio
];
361 cl
= cl_prev
->next_alive
;
363 cl_prev
->next_alive
= cl
->next_alive
;
364 cl
->next_alive
= NULL
;
366 if (cl
== q
->active
[prio
]) {
367 q
->active
[prio
] = cl_prev
;
368 if (cl
== q
->active
[prio
]) {
369 q
->active
[prio
] = NULL
;
370 q
->activemask
&= ~(1<<prio
);
375 cl
= cl_prev
->next_alive
;
378 } while ((cl_prev
= cl
) != q
->active
[prio
]);
382 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
384 int toplevel
= q
->toplevel
;
386 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
390 PSCHED_GET_TIME(now
);
391 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
392 PSCHED_TADD2(q
->now
, incr
, now
);
395 if (PSCHED_TLESS(cl
->undertime
, now
)) {
396 q
->toplevel
= cl
->level
;
399 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
404 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
406 struct cbq_sched_data
*q
= qdisc_priv(sch
);
409 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
411 #ifdef CONFIG_NET_CLS_POLICE
415 if (ret
== NET_XMIT_BYPASS
)
421 #ifdef CONFIG_NET_CLS_POLICE
422 cl
->q
->__parent
= sch
;
424 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == NET_XMIT_SUCCESS
) {
426 sch
->bstats
.packets
++;
427 sch
->bstats
.bytes
+=len
;
428 cbq_mark_toplevel(q
, cl
);
430 cbq_activate_class(cl
);
435 cbq_mark_toplevel(q
, cl
);
441 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
443 struct cbq_sched_data
*q
= qdisc_priv(sch
);
444 struct cbq_class
*cl
;
447 if ((cl
= q
->tx_class
) == NULL
) {
454 cbq_mark_toplevel(q
, cl
);
456 #ifdef CONFIG_NET_CLS_POLICE
458 cl
->q
->__parent
= sch
;
460 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
462 sch
->qstats
.requeues
++;
464 cbq_activate_class(cl
);
472 /* Overlimit actions */
474 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
476 static void cbq_ovl_classic(struct cbq_class
*cl
)
478 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
479 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
482 delay
+= cl
->offtime
;
485 Class goes to sleep, so that it will have no
486 chance to work avgidle. Let's forgive it 8)
488 BTW cbq-2.0 has a crap in this
489 place, apparently they forgot to shift it by cl->ewma_log.
492 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
493 if (cl
->avgidle
< cl
->minidle
)
494 cl
->avgidle
= cl
->minidle
;
497 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
499 cl
->xstats
.overactions
++;
502 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
503 q
->wd_expires
= delay
;
505 /* Dirty work! We must schedule wakeups based on
506 real available rate, rather than leaf rate,
507 which may be tiny (even zero).
509 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
511 psched_tdiff_t base_delay
= q
->wd_expires
;
513 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
514 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
515 if (delay
< base_delay
) {
522 q
->wd_expires
= base_delay
;
526 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
530 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
532 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
533 struct cbq_class
*this = cl
;
536 if (cl
->level
> q
->toplevel
) {
540 } while ((cl
= cl
->borrow
) != NULL
);
547 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
549 static void cbq_ovl_delay(struct cbq_class
*cl
)
551 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
552 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
555 unsigned long sched
= jiffies
;
557 delay
+= cl
->offtime
;
559 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
560 if (cl
->avgidle
< cl
->minidle
)
561 cl
->avgidle
= cl
->minidle
;
562 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
565 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
566 cl
->penalized
= sched
;
567 cl
->cpriority
= TC_CBQ_MAXPRIO
;
568 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
569 if (del_timer(&q
->delay_timer
) &&
570 (long)(q
->delay_timer
.expires
- sched
) > 0)
571 q
->delay_timer
.expires
= sched
;
572 add_timer(&q
->delay_timer
);
574 cl
->xstats
.overactions
++;
579 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
580 q
->wd_expires
= delay
;
583 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
585 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
587 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
589 cl
->penalized
= jiffies
+ cl
->penalty
;
591 if (cl
->cpriority
!= cl
->priority2
) {
592 cl
->cpriority
= cl
->priority2
;
593 q
->pmask
|= (1<<cl
->cpriority
);
594 cl
->xstats
.overactions
++;
599 /* TC_CBQ_OVL_DROP: penalize class by dropping */
601 static void cbq_ovl_drop(struct cbq_class
*cl
)
603 if (cl
->q
->ops
->drop
)
604 if (cl
->q
->ops
->drop(cl
->q
))
606 cl
->xstats
.overactions
++;
610 static void cbq_watchdog(unsigned long arg
)
612 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
614 sch
->flags
&= ~TCQ_F_THROTTLED
;
615 netif_schedule(sch
->dev
);
618 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
620 struct cbq_class
*cl
;
621 struct cbq_class
*cl_prev
= q
->active
[prio
];
622 unsigned long now
= jiffies
;
623 unsigned long sched
= now
;
629 cl
= cl_prev
->next_alive
;
630 if ((long)(now
- cl
->penalized
) > 0) {
631 cl_prev
->next_alive
= cl
->next_alive
;
632 cl
->next_alive
= NULL
;
633 cl
->cpriority
= cl
->priority
;
635 cbq_activate_class(cl
);
637 if (cl
== q
->active
[prio
]) {
638 q
->active
[prio
] = cl_prev
;
639 if (cl
== q
->active
[prio
]) {
640 q
->active
[prio
] = NULL
;
645 cl
= cl_prev
->next_alive
;
646 } else if ((long)(sched
- cl
->penalized
) > 0)
647 sched
= cl
->penalized
;
648 } while ((cl_prev
= cl
) != q
->active
[prio
]);
650 return (long)(sched
- now
);
653 static void cbq_undelay(unsigned long arg
)
655 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
656 struct cbq_sched_data
*q
= qdisc_priv(sch
);
664 int prio
= ffz(~pmask
);
669 tmp
= cbq_undelay_prio(q
, prio
);
672 if (tmp
< delay
|| delay
== 0)
678 q
->delay_timer
.expires
= jiffies
+ delay
;
679 add_timer(&q
->delay_timer
);
682 sch
->flags
&= ~TCQ_F_THROTTLED
;
683 netif_schedule(sch
->dev
);
687 #ifdef CONFIG_NET_CLS_POLICE
689 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
692 struct Qdisc
*sch
= child
->__parent
;
693 struct cbq_sched_data
*q
= qdisc_priv(sch
);
694 struct cbq_class
*cl
= q
->rx_class
;
698 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
700 cbq_mark_toplevel(q
, cl
);
703 cl
->q
->__parent
= sch
;
705 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
707 sch
->bstats
.packets
++;
708 sch
->bstats
.bytes
+=len
;
710 cbq_activate_class(cl
);
723 It is mission critical procedure.
725 We "regenerate" toplevel cutoff, if transmitting class
726 has backlog and it is not regulated. It is not part of
727 original CBQ description, but looks more reasonable.
728 Probably, it is wrong. This question needs further investigation.
731 static __inline__
void
732 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
733 struct cbq_class
*borrowed
)
735 if (cl
&& q
->toplevel
>= borrowed
->level
) {
736 if (cl
->q
->q
.qlen
> 1) {
738 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
739 q
->toplevel
= borrowed
->level
;
742 } while ((borrowed
=borrowed
->borrow
) != NULL
);
745 /* It is not necessary now. Uncommenting it
746 will save CPU cycles, but decrease fairness.
748 q
->toplevel
= TC_CBQ_MAXLEVEL
;
754 cbq_update(struct cbq_sched_data
*q
)
756 struct cbq_class
*this = q
->tx_class
;
757 struct cbq_class
*cl
= this;
762 for ( ; cl
; cl
= cl
->share
) {
763 long avgidle
= cl
->avgidle
;
766 cl
->bstats
.packets
++;
767 cl
->bstats
.bytes
+= len
;
770 (now - last) is total time between packet right edges.
771 (last_pktlen/rate) is "virtual" busy time, so that
773 idle = (now - last) - last_pktlen/rate
776 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
777 if ((unsigned long)idle
> 128*1024*1024) {
778 avgidle
= cl
->maxidle
;
780 idle
-= L2T(cl
, len
);
782 /* true_avgidle := (1-W)*true_avgidle + W*idle,
783 where W=2^{-ewma_log}. But cl->avgidle is scaled:
784 cl->avgidle == true_avgidle/W,
787 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
791 /* Overlimit or at-limit */
793 if (avgidle
< cl
->minidle
)
794 avgidle
= cl
->minidle
;
796 cl
->avgidle
= avgidle
;
798 /* Calculate expected time, when this class
799 will be allowed to send.
801 (1-W)*true_avgidle + W*delay = 0, i.e.
802 idle = (1/W - 1)*(-true_avgidle)
804 idle = (1 - W)*(-cl->avgidle);
806 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
810 To maintain the rate allocated to the class,
811 we add to undertime virtual clock,
812 necessary to complete transmitted packet.
813 (len/phys_bandwidth has been already passed
814 to the moment of cbq_update)
817 idle
-= L2T(&q
->link
, len
);
818 idle
+= L2T(cl
, len
);
820 PSCHED_AUDIT_TDIFF(idle
);
822 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
826 PSCHED_SET_PASTPERFECT(cl
->undertime
);
827 if (avgidle
> cl
->maxidle
)
828 cl
->avgidle
= cl
->maxidle
;
830 cl
->avgidle
= avgidle
;
835 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
838 static __inline__
struct cbq_class
*
839 cbq_under_limit(struct cbq_class
*cl
)
841 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
842 struct cbq_class
*this_cl
= cl
;
844 if (cl
->tparent
== NULL
)
847 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
848 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
854 /* It is very suspicious place. Now overlimit
855 action is generated for not bounded classes
856 only if link is completely congested.
857 Though it is in agree with ancestor-only paradigm,
858 it looks very stupid. Particularly,
859 it means that this chunk of code will either
860 never be called or result in strong amplification
861 of burstiness. Dangerous, silly, and, however,
862 no another solution exists.
864 if ((cl
= cl
->borrow
) == NULL
) {
865 this_cl
->qstats
.overlimits
++;
866 this_cl
->overlimit(this_cl
);
869 if (cl
->level
> q
->toplevel
)
871 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
872 PSCHED_TLESS(q
->now
, cl
->undertime
));
878 static __inline__
struct sk_buff
*
879 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
881 struct cbq_sched_data
*q
= qdisc_priv(sch
);
882 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
886 cl_tail
= cl_prev
= q
->active
[prio
];
887 cl
= cl_prev
->next_alive
;
894 struct cbq_class
*borrow
= cl
;
897 (borrow
= cbq_under_limit(cl
)) == NULL
)
900 if (cl
->deficit
<= 0) {
901 /* Class exhausted its allotment per
902 this round. Switch to the next one.
905 cl
->deficit
+= cl
->quantum
;
909 skb
= cl
->q
->dequeue(cl
->q
);
911 /* Class did not give us any skb :-(
912 It could occur even if cl->q->q.qlen != 0
913 f.e. if cl->q == "tbf"
918 cl
->deficit
-= skb
->len
;
920 q
->tx_borrowed
= borrow
;
922 #ifndef CBQ_XSTATS_BORROWS_BYTES
923 borrow
->xstats
.borrows
++;
924 cl
->xstats
.borrows
++;
926 borrow
->xstats
.borrows
+= skb
->len
;
927 cl
->xstats
.borrows
+= skb
->len
;
930 q
->tx_len
= skb
->len
;
932 if (cl
->deficit
<= 0) {
933 q
->active
[prio
] = cl
;
935 cl
->deficit
+= cl
->quantum
;
940 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
941 /* Class is empty or penalized.
942 Unlink it from active chain.
944 cl_prev
->next_alive
= cl
->next_alive
;
945 cl
->next_alive
= NULL
;
947 /* Did cl_tail point to it? */
952 /* Was it the last class in this band? */
955 q
->active
[prio
] = NULL
;
956 q
->activemask
&= ~(1<<prio
);
958 cbq_activate_class(cl
);
962 q
->active
[prio
] = cl_tail
;
965 cbq_activate_class(cl
);
973 } while (cl_prev
!= cl_tail
);
976 q
->active
[prio
] = cl_prev
;
981 static __inline__
struct sk_buff
*
982 cbq_dequeue_1(struct Qdisc
*sch
)
984 struct cbq_sched_data
*q
= qdisc_priv(sch
);
988 activemask
= q
->activemask
&0xFF;
990 int prio
= ffz(~activemask
);
991 activemask
&= ~(1<<prio
);
992 skb
= cbq_dequeue_prio(sch
, prio
);
999 static struct sk_buff
*
1000 cbq_dequeue(struct Qdisc
*sch
)
1002 struct sk_buff
*skb
;
1003 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1005 psched_tdiff_t incr
;
1007 PSCHED_GET_TIME(now
);
1008 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
1011 psched_tdiff_t incr2
;
1012 /* Time integrator. We calculate EOS time
1013 by adding expected packet transmission time.
1014 If real time is greater, we warp artificial clock,
1017 cbq_time = max(real_time, work);
1019 incr2
= L2T(&q
->link
, q
->tx_len
);
1020 PSCHED_TADD(q
->now
, incr2
);
1022 if ((incr
-= incr2
) < 0)
1025 PSCHED_TADD(q
->now
, incr
);
1031 skb
= cbq_dequeue_1(sch
);
1034 sch
->flags
&= ~TCQ_F_THROTTLED
;
1038 /* All the classes are overlimit.
1042 1. Scheduler is empty.
1043 2. Toplevel cutoff inhibited borrowing.
1044 3. Root class is overlimit.
1046 Reset 2d and 3d conditions and retry.
1048 Note, that NS and cbq-2.0 are buggy, peeking
1049 an arbitrary class is appropriate for ancestor-only
1050 sharing, but not for toplevel algorithm.
1052 Our version is better, but slower, because it requires
1053 two passes, but it is unavoidable with top-level sharing.
1056 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1057 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1060 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1061 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1064 /* No packets in scheduler or nobody wants to give them to us :-(
1065 Sigh... start watchdog timer in the last case. */
1068 sch
->qstats
.overlimits
++;
1069 if (q
->wd_expires
) {
1070 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1073 mod_timer(&q
->wd_timer
, jiffies
+ delay
);
1074 sch
->flags
|= TCQ_F_THROTTLED
;
1080 /* CBQ class maintanance routines */
1082 static void cbq_adjust_levels(struct cbq_class
*this)
1089 struct cbq_class
*cl
;
1091 if ((cl
= this->children
) != NULL
) {
1093 if (cl
->level
> level
)
1095 } while ((cl
= cl
->sibling
) != this->children
);
1097 this->level
= level
+1;
1098 } while ((this = this->tparent
) != NULL
);
1101 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1103 struct cbq_class
*cl
;
1106 if (q
->quanta
[prio
] == 0)
1109 for (h
=0; h
<16; h
++) {
1110 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1111 /* BUGGGG... Beware! This expression suffer of
1112 arithmetic overflows!
1114 if (cl
->priority
== prio
) {
1115 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1118 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1119 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1120 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1126 static void cbq_sync_defmap(struct cbq_class
*cl
)
1128 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1129 struct cbq_class
*split
= cl
->split
;
1136 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1137 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1138 split
->defaults
[i
] = NULL
;
1141 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1142 int level
= split
->level
;
1144 if (split
->defaults
[i
])
1147 for (h
=0; h
<16; h
++) {
1148 struct cbq_class
*c
;
1150 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1151 if (c
->split
== split
&& c
->level
< level
&&
1153 split
->defaults
[i
] = c
;
1161 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1163 struct cbq_class
*split
= NULL
;
1166 if ((split
= cl
->split
) == NULL
)
1168 splitid
= split
->classid
;
1171 if (split
== NULL
|| split
->classid
!= splitid
) {
1172 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1173 if (split
->classid
== splitid
)
1180 if (cl
->split
!= split
) {
1182 cbq_sync_defmap(cl
);
1184 cl
->defmap
= def
&mask
;
1186 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1188 cbq_sync_defmap(cl
);
1191 static void cbq_unlink_class(struct cbq_class
*this)
1193 struct cbq_class
*cl
, **clp
;
1194 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1196 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1204 if (this->tparent
) {
1213 } while ((cl
= *clp
) != this->sibling
);
1215 if (this->tparent
->children
== this) {
1216 this->tparent
->children
= this->sibling
;
1217 if (this->sibling
== this)
1218 this->tparent
->children
= NULL
;
1221 BUG_TRAP(this->sibling
== this);
1225 static void cbq_link_class(struct cbq_class
*this)
1227 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1228 unsigned h
= cbq_hash(this->classid
);
1229 struct cbq_class
*parent
= this->tparent
;
1231 this->sibling
= this;
1232 this->next
= q
->classes
[h
];
1233 q
->classes
[h
] = this;
1238 if (parent
->children
== NULL
) {
1239 parent
->children
= this;
1241 this->sibling
= parent
->children
->sibling
;
1242 parent
->children
->sibling
= this;
1246 static unsigned int cbq_drop(struct Qdisc
* sch
)
1248 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1249 struct cbq_class
*cl
, *cl_head
;
1253 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1254 if ((cl_head
= q
->active
[prio
]) == NULL
)
1259 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1263 } while ((cl
= cl
->next_alive
) != cl_head
);
1269 cbq_reset(struct Qdisc
* sch
)
1271 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1272 struct cbq_class
*cl
;
1279 q
->tx_borrowed
= NULL
;
1280 del_timer(&q
->wd_timer
);
1281 del_timer(&q
->delay_timer
);
1282 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1283 PSCHED_GET_TIME(q
->now
);
1286 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1287 q
->active
[prio
] = NULL
;
1289 for (h
= 0; h
< 16; h
++) {
1290 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1293 cl
->next_alive
= NULL
;
1294 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1295 cl
->avgidle
= cl
->maxidle
;
1296 cl
->deficit
= cl
->quantum
;
1297 cl
->cpriority
= cl
->priority
;
1304 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1306 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1307 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1308 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1310 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1311 cl
->ewma_log
= lss
->ewma_log
;
1312 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1313 cl
->avpkt
= lss
->avpkt
;
1314 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1315 cl
->minidle
= -(long)lss
->minidle
;
1316 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1317 cl
->maxidle
= lss
->maxidle
;
1318 cl
->avgidle
= lss
->maxidle
;
1320 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1321 cl
->offtime
= lss
->offtime
;
1325 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1327 q
->nclasses
[cl
->priority
]--;
1328 q
->quanta
[cl
->priority
] -= cl
->weight
;
1329 cbq_normalize_quanta(q
, cl
->priority
);
1332 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1334 q
->nclasses
[cl
->priority
]++;
1335 q
->quanta
[cl
->priority
] += cl
->weight
;
1336 cbq_normalize_quanta(q
, cl
->priority
);
1339 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1341 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1344 cl
->allot
= wrr
->allot
;
1346 cl
->weight
= wrr
->weight
;
1347 if (wrr
->priority
) {
1348 cl
->priority
= wrr
->priority
-1;
1349 cl
->cpriority
= cl
->priority
;
1350 if (cl
->priority
>= cl
->priority2
)
1351 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1358 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1360 switch (ovl
->strategy
) {
1361 case TC_CBQ_OVL_CLASSIC
:
1362 cl
->overlimit
= cbq_ovl_classic
;
1364 case TC_CBQ_OVL_DELAY
:
1365 cl
->overlimit
= cbq_ovl_delay
;
1367 case TC_CBQ_OVL_LOWPRIO
:
1368 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1369 ovl
->priority2
-1 <= cl
->priority
)
1371 cl
->priority2
= ovl
->priority2
-1;
1372 cl
->overlimit
= cbq_ovl_lowprio
;
1374 case TC_CBQ_OVL_DROP
:
1375 cl
->overlimit
= cbq_ovl_drop
;
1377 case TC_CBQ_OVL_RCLASSIC
:
1378 cl
->overlimit
= cbq_ovl_rclassic
;
1383 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1390 cl
->police
= p
->police
;
1392 if (cl
->q
->handle
) {
1393 if (p
->police
== TC_POLICE_RECLASSIFY
)
1394 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1396 cl
->q
->reshape_fail
= NULL
;
1402 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1404 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1408 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1410 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1411 struct rtattr
*tb
[TCA_CBQ_MAX
];
1412 struct tc_ratespec
*r
;
1414 if (rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
) < 0 ||
1415 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1416 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1419 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1420 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1423 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1425 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
)
1429 q
->link
.sibling
= &q
->link
;
1430 q
->link
.classid
= sch
->handle
;
1431 q
->link
.qdisc
= sch
;
1432 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1433 q
->link
.q
= &noop_qdisc
;
1435 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1436 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1437 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1438 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1439 q
->link
.overlimit
= cbq_ovl_classic
;
1440 q
->link
.allot
= psched_mtu(sch
->dev
);
1441 q
->link
.quantum
= q
->link
.allot
;
1442 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1444 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1445 q
->link
.avpkt
= q
->link
.allot
/2;
1446 q
->link
.minidle
= -0x7FFFFFFF;
1447 q
->link
.stats_lock
= &sch
->dev
->queue_lock
;
1449 init_timer(&q
->wd_timer
);
1450 q
->wd_timer
.data
= (unsigned long)sch
;
1451 q
->wd_timer
.function
= cbq_watchdog
;
1452 init_timer(&q
->delay_timer
);
1453 q
->delay_timer
.data
= (unsigned long)sch
;
1454 q
->delay_timer
.function
= cbq_undelay
;
1455 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1456 PSCHED_GET_TIME(q
->now
);
1459 cbq_link_class(&q
->link
);
1461 if (tb
[TCA_CBQ_LSSOPT
-1])
1462 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1464 cbq_addprio(q
, &q
->link
);
1468 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1470 unsigned char *b
= skb
->tail
;
1472 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1476 skb_trim(skb
, b
- skb
->data
);
1480 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1482 unsigned char *b
= skb
->tail
;
1483 struct tc_cbq_lssopt opt
;
1486 if (cl
->borrow
== NULL
)
1487 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1488 if (cl
->share
== NULL
)
1489 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1490 opt
.ewma_log
= cl
->ewma_log
;
1491 opt
.level
= cl
->level
;
1492 opt
.avpkt
= cl
->avpkt
;
1493 opt
.maxidle
= cl
->maxidle
;
1494 opt
.minidle
= (u32
)(-cl
->minidle
);
1495 opt
.offtime
= cl
->offtime
;
1497 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1501 skb_trim(skb
, b
- skb
->data
);
1505 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1507 unsigned char *b
= skb
->tail
;
1508 struct tc_cbq_wrropt opt
;
1511 opt
.allot
= cl
->allot
;
1512 opt
.priority
= cl
->priority
+1;
1513 opt
.cpriority
= cl
->cpriority
+1;
1514 opt
.weight
= cl
->weight
;
1515 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1519 skb_trim(skb
, b
- skb
->data
);
1523 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1525 unsigned char *b
= skb
->tail
;
1526 struct tc_cbq_ovl opt
;
1528 opt
.strategy
= cl
->ovl_strategy
;
1529 opt
.priority2
= cl
->priority2
+1;
1531 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1532 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1536 skb_trim(skb
, b
- skb
->data
);
1540 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1542 unsigned char *b
= skb
->tail
;
1543 struct tc_cbq_fopt opt
;
1545 if (cl
->split
|| cl
->defmap
) {
1546 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1547 opt
.defmap
= cl
->defmap
;
1549 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1554 skb_trim(skb
, b
- skb
->data
);
1558 #ifdef CONFIG_NET_CLS_POLICE
1559 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1561 unsigned char *b
= skb
->tail
;
1562 struct tc_cbq_police opt
;
1565 opt
.police
= cl
->police
;
1568 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1573 skb_trim(skb
, b
- skb
->data
);
1578 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1580 if (cbq_dump_lss(skb
, cl
) < 0 ||
1581 cbq_dump_rate(skb
, cl
) < 0 ||
1582 cbq_dump_wrr(skb
, cl
) < 0 ||
1583 cbq_dump_ovl(skb
, cl
) < 0 ||
1584 #ifdef CONFIG_NET_CLS_POLICE
1585 cbq_dump_police(skb
, cl
) < 0 ||
1587 cbq_dump_fopt(skb
, cl
) < 0)
1592 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1594 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1595 unsigned char *b
= skb
->tail
;
1598 rta
= (struct rtattr
*)b
;
1599 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1600 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1601 goto rtattr_failure
;
1602 rta
->rta_len
= skb
->tail
- b
;
1606 skb_trim(skb
, b
- skb
->data
);
1611 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1613 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1615 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1616 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1620 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1621 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1623 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1624 unsigned char *b
= skb
->tail
;
1628 tcm
->tcm_parent
= cl
->tparent
->classid
;
1630 tcm
->tcm_parent
= TC_H_ROOT
;
1631 tcm
->tcm_handle
= cl
->classid
;
1632 tcm
->tcm_info
= cl
->q
->handle
;
1634 rta
= (struct rtattr
*)b
;
1635 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1636 if (cbq_dump_attr(skb
, cl
) < 0)
1637 goto rtattr_failure
;
1638 rta
->rta_len
= skb
->tail
- b
;
1642 skb_trim(skb
, b
- skb
->data
);
1647 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1648 struct gnet_dump
*d
)
1650 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1651 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1653 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1654 cl
->xstats
.avgidle
= cl
->avgidle
;
1655 cl
->xstats
.undertime
= 0;
1657 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1658 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1660 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1661 #ifdef CONFIG_NET_ESTIMATOR
1662 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1664 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1667 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1670 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1673 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1677 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)) == NULL
)
1680 #ifdef CONFIG_NET_CLS_POLICE
1681 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1682 new->reshape_fail
= cbq_reshape_fail
;
1688 sch
->q
.qlen
-= (*old
)->q
.qlen
;
1690 sch_tree_unlock(sch
);
1697 static struct Qdisc
*
1698 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1700 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1702 return cl
? cl
->q
: NULL
;
1705 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1707 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1708 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1712 return (unsigned long)cl
;
1717 static void cbq_destroy_filters(struct cbq_class
*cl
)
1719 struct tcf_proto
*tp
;
1721 while ((tp
= cl
->filter_list
) != NULL
) {
1722 cl
->filter_list
= tp
->next
;
1727 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1729 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1731 BUG_TRAP(!cl
->filters
);
1733 cbq_destroy_filters(cl
);
1734 qdisc_destroy(cl
->q
);
1735 qdisc_put_rtab(cl
->R_tab
);
1736 #ifdef CONFIG_NET_ESTIMATOR
1737 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1744 cbq_destroy(struct Qdisc
* sch
)
1746 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1747 struct cbq_class
*cl
;
1750 #ifdef CONFIG_NET_CLS_POLICE
1754 * Filters must be destroyed first because we don't destroy the
1755 * classes from root to leafs which means that filters can still
1756 * be bound to classes which have been destroyed already. --TGR '04
1758 for (h
= 0; h
< 16; h
++)
1759 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1760 cbq_destroy_filters(cl
);
1762 for (h
= 0; h
< 16; h
++) {
1763 struct cbq_class
*next
;
1765 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1767 cbq_destroy_class(sch
, cl
);
1772 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1774 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1776 if (--cl
->refcnt
== 0) {
1777 #ifdef CONFIG_NET_CLS_POLICE
1778 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1780 spin_lock_bh(&sch
->dev
->queue_lock
);
1781 if (q
->rx_class
== cl
)
1783 spin_unlock_bh(&sch
->dev
->queue_lock
);
1786 cbq_destroy_class(sch
, cl
);
1791 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1795 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1796 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1797 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1798 struct rtattr
*tb
[TCA_CBQ_MAX
];
1799 struct cbq_class
*parent
;
1800 struct qdisc_rate_table
*rtab
= NULL
;
1802 if (opt
==NULL
|| rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
))
1805 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1806 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1809 if (tb
[TCA_CBQ_FOPT
-1] &&
1810 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1813 if (tb
[TCA_CBQ_RATE
-1] &&
1814 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1817 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1818 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1821 if (tb
[TCA_CBQ_WRROPT
-1] &&
1822 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1825 #ifdef CONFIG_NET_CLS_POLICE
1826 if (tb
[TCA_CBQ_POLICE
-1] &&
1827 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1834 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1836 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1840 if (tb
[TCA_CBQ_RATE
-1]) {
1841 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1846 /* Change class parameters */
1849 if (cl
->next_alive
!= NULL
)
1850 cbq_deactivate_class(cl
);
1853 rtab
= xchg(&cl
->R_tab
, rtab
);
1854 qdisc_put_rtab(rtab
);
1857 if (tb
[TCA_CBQ_LSSOPT
-1])
1858 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1860 if (tb
[TCA_CBQ_WRROPT
-1]) {
1862 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1865 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1866 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1868 #ifdef CONFIG_NET_CLS_POLICE
1869 if (tb
[TCA_CBQ_POLICE
-1])
1870 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1873 if (tb
[TCA_CBQ_FOPT
-1])
1874 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1877 cbq_activate_class(cl
);
1879 sch_tree_unlock(sch
);
1881 #ifdef CONFIG_NET_ESTIMATOR
1882 if (tca
[TCA_RATE
-1])
1883 gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1884 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1889 if (parentid
== TC_H_ROOT
)
1892 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1893 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1896 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1902 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1906 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1908 for (i
=0; i
<0x8000; i
++) {
1909 if (++q
->hgenerator
>= 0x8000)
1911 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1917 classid
= classid
|q
->hgenerator
;
1922 parent
= cbq_class_lookup(q
, parentid
);
1929 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1935 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1936 cl
->q
= &noop_qdisc
;
1937 cl
->classid
= classid
;
1938 cl
->tparent
= parent
;
1940 cl
->allot
= parent
->allot
;
1941 cl
->quantum
= cl
->allot
;
1942 cl
->weight
= cl
->R_tab
->rate
.rate
;
1943 cl
->stats_lock
= &sch
->dev
->queue_lock
;
1947 cl
->borrow
= cl
->tparent
;
1948 if (cl
->tparent
!= &q
->link
)
1949 cl
->share
= cl
->tparent
;
1950 cbq_adjust_levels(parent
);
1951 cl
->minidle
= -0x7FFFFFFF;
1952 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1953 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1954 if (cl
->ewma_log
==0)
1955 cl
->ewma_log
= q
->link
.ewma_log
;
1957 cl
->maxidle
= q
->link
.maxidle
;
1959 cl
->avpkt
= q
->link
.avpkt
;
1960 cl
->overlimit
= cbq_ovl_classic
;
1961 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1962 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1963 #ifdef CONFIG_NET_CLS_POLICE
1964 if (tb
[TCA_CBQ_POLICE
-1])
1965 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1967 if (tb
[TCA_CBQ_FOPT
-1])
1968 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1969 sch_tree_unlock(sch
);
1971 #ifdef CONFIG_NET_ESTIMATOR
1972 if (tca
[TCA_RATE
-1])
1973 gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1974 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1977 *arg
= (unsigned long)cl
;
1981 qdisc_put_rtab(rtab
);
1985 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1987 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1988 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1990 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1996 cbq_deactivate_class(cl
);
1998 if (q
->tx_borrowed
== cl
)
1999 q
->tx_borrowed
= q
->tx_class
;
2000 if (q
->tx_class
== cl
) {
2002 q
->tx_borrowed
= NULL
;
2004 #ifdef CONFIG_NET_CLS_POLICE
2005 if (q
->rx_class
== cl
)
2009 cbq_unlink_class(cl
);
2010 cbq_adjust_levels(cl
->tparent
);
2012 cbq_sync_defmap(cl
);
2015 sch_tree_unlock(sch
);
2017 if (--cl
->refcnt
== 0)
2018 cbq_destroy_class(sch
, cl
);
2023 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2025 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2026 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2031 return &cl
->filter_list
;
2034 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2037 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2038 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2039 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2042 if (p
&& p
->level
<= cl
->level
)
2045 return (unsigned long)cl
;
2050 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2052 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2057 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2059 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2065 for (h
= 0; h
< 16; h
++) {
2066 struct cbq_class
*cl
;
2068 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2069 if (arg
->count
< arg
->skip
) {
2073 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2082 static struct Qdisc_class_ops cbq_class_ops
= {
2087 .change
= cbq_change_class
,
2088 .delete = cbq_delete
,
2090 .tcf_chain
= cbq_find_tcf
,
2091 .bind_tcf
= cbq_bind_filter
,
2092 .unbind_tcf
= cbq_unbind_filter
,
2093 .dump
= cbq_dump_class
,
2094 .dump_stats
= cbq_dump_class_stats
,
2097 static struct Qdisc_ops cbq_qdisc_ops
= {
2099 .cl_ops
= &cbq_class_ops
,
2101 .priv_size
= sizeof(struct cbq_sched_data
),
2102 .enqueue
= cbq_enqueue
,
2103 .dequeue
= cbq_dequeue
,
2104 .requeue
= cbq_requeue
,
2108 .destroy
= cbq_destroy
,
2111 .dump_stats
= cbq_dump_stats
,
2112 .owner
= THIS_MODULE
,
2115 static int __init
cbq_module_init(void)
2117 return register_qdisc(&cbq_qdisc_ops
);
2119 static void __exit
cbq_module_exit(void)
2121 unregister_qdisc(&cbq_qdisc_ops
);
2123 module_init(cbq_module_init
)
2124 module_exit(cbq_module_exit
)
2125 MODULE_LICENSE("GPL");