2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/string.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
24 #include <linux/errno.h>
25 #include <linux/interrupt.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
32 #include <net/route.h>
33 #include <linux/skbuff.h>
35 #include <net/pkt_sched.h>
38 /* Class-Based Queueing (CBQ) algorithm.
39 =======================================
41 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
42 Management Models for Packet Networks",
43 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50 [4] Sally Floyd and Michael Speer, "Experimental Results
51 for Class-Based Queueing", 1998, not published.
53 -----------------------------------------------------------------------
55 Algorithm skeleton was taken from NS simulator cbq.cc.
56 If someone wants to check this code against the LBL version,
57 he should take into account that ONLY the skeleton was borrowed,
58 the implementation is different. Particularly:
60 --- The WRR algorithm is different. Our version looks more
61 reasonable (I hope) and works when quanta are allowed to be
62 less than MTU, which is always the case when real time classes
63 have small rates. Note, that the statement of [3] is
64 incomplete, delay may actually be estimated even if class
65 per-round allotment is less than MTU. Namely, if per-round
66 allotment is W*r_i, and r_1+...+r_k = r < 1
68 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70 In the worst case we have IntServ estimate with D = W*r+k*MTU
71 and C = MTU*r. The proof (if correct at all) is trivial.
74 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
75 interpret some places, which look like wrong translations
76 from NS. Anyone is advised to find these differences
77 and explain to me, why I am wrong 8).
79 --- Linux has no EOI event, so that we cannot estimate true class
80 idle time. Workaround is to consider the next dequeue event
81 as sign that previous packet is finished. This is wrong because of
82 internal device queueing, but on a permanently loaded link it is true.
83 Moreover, combined with clock integrator, this scheme looks
84 very close to an ideal solution. */
86 struct cbq_sched_data
;
91 struct cbq_class
*next
; /* hash table link */
92 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
96 unsigned char priority
; /* class priority */
97 unsigned char priority2
; /* priority to be used after overlimit */
98 unsigned char ewma_log
; /* time constant for idle time calculation */
99 unsigned char ovl_strategy
;
100 #ifdef CONFIG_NET_CLS_POLICE
101 unsigned char police
;
106 /* Link-sharing scheduler parameters */
107 long maxidle
; /* Class parameters: see below. */
111 struct qdisc_rate_table
*R_tab
;
113 /* Overlimit strategy parameters */
114 void (*overlimit
)(struct cbq_class
*cl
);
115 psched_tdiff_t penalty
;
117 /* General scheduler (WRR) parameters */
119 long quantum
; /* Allotment per WRR round */
120 long weight
; /* Relative allotment: see below */
122 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
123 struct cbq_class
*split
; /* Ptr to split node */
124 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
125 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
126 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
128 struct cbq_class
*sibling
; /* Sibling chain */
129 struct cbq_class
*children
; /* Pointer to children chain */
131 struct Qdisc
*q
; /* Elementary queueing discipline */
135 unsigned char cpriority
; /* Effective priority */
136 unsigned char delayed
;
137 unsigned char level
; /* level of the class in hierarchy:
138 0 for leaf classes, and maximal
139 level of children + 1 for nodes.
142 psched_time_t last
; /* Last end of service */
143 psched_time_t undertime
;
145 long deficit
; /* Saved deficit for WRR */
146 psched_time_t penalized
;
147 struct gnet_stats_basic bstats
;
148 struct gnet_stats_queue qstats
;
149 struct gnet_stats_rate_est rate_est
;
150 spinlock_t
*stats_lock
;
151 struct tc_cbq_xstats xstats
;
153 struct tcf_proto
*filter_list
;
158 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
161 struct cbq_sched_data
163 struct cbq_class
*classes
[16]; /* Hash table of all classes */
164 int nclasses
[TC_CBQ_MAXPRIO
+1];
165 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
167 struct cbq_class link
;
170 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
173 #ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class
*rx_class
;
176 struct cbq_class
*tx_class
;
177 struct cbq_class
*tx_borrowed
;
179 psched_time_t now
; /* Cached timestamp */
180 psched_time_t now_rt
; /* Cached real time */
183 struct hrtimer delay_timer
;
184 struct qdisc_watchdog watchdog
; /* Watchdog timer,
188 psched_tdiff_t wd_expires
;
194 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197 static __inline__
unsigned cbq_hash(u32 h
)
204 static __inline__
struct cbq_class
*
205 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
207 struct cbq_class
*cl
;
209 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
210 if (cl
->classid
== classid
)
215 #ifdef CONFIG_NET_CLS_POLICE
217 static struct cbq_class
*
218 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
220 struct cbq_class
*cl
, *new;
222 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
223 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
231 /* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
241 static struct cbq_class
*
242 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qerr
)
244 struct cbq_sched_data
*q
= qdisc_priv(sch
);
245 struct cbq_class
*head
= &q
->link
;
246 struct cbq_class
**defmap
;
247 struct cbq_class
*cl
= NULL
;
248 u32 prio
= skb
->priority
;
249 struct tcf_result res
;
252 * Step 1. If skb->priority points to one of our classes, use it.
254 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
255 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
258 *qerr
= NET_XMIT_BYPASS
;
261 defmap
= head
->defaults
;
264 * Step 2+n. Apply classifier.
266 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
269 if ((cl
= (void*)res
.class) == NULL
) {
270 if (TC_H_MAJ(res
.classid
))
271 cl
= cbq_class_lookup(q
, res
.classid
);
272 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
273 cl
= defmap
[TC_PRIO_BESTEFFORT
];
275 if (cl
== NULL
|| cl
->level
>= head
->level
)
279 #ifdef CONFIG_NET_CLS_ACT
283 *qerr
= NET_XMIT_SUCCESS
;
287 #elif defined(CONFIG_NET_CLS_POLICE)
289 case TC_POLICE_RECLASSIFY
:
290 return cbq_reclassify(skb
, cl
);
301 * Step 3+n. If classifier selected a link sharing class,
302 * apply agency specific classifier.
303 * Repeat this procdure until we hit a leaf node.
312 * Step 4. No success...
314 if (TC_H_MAJ(prio
) == 0 &&
315 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
316 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
323 A packet has just been enqueued on the empty class.
324 cbq_activate_class adds it to the tail of active class list
325 of its priority band.
328 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
330 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
331 int prio
= cl
->cpriority
;
332 struct cbq_class
*cl_tail
;
334 cl_tail
= q
->active
[prio
];
335 q
->active
[prio
] = cl
;
337 if (cl_tail
!= NULL
) {
338 cl
->next_alive
= cl_tail
->next_alive
;
339 cl_tail
->next_alive
= cl
;
342 q
->activemask
|= (1<<prio
);
347 Unlink class from active chain.
348 Note that this same procedure is done directly in cbq_dequeue*
349 during round-robin procedure.
352 static void cbq_deactivate_class(struct cbq_class
*this)
354 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
355 int prio
= this->cpriority
;
356 struct cbq_class
*cl
;
357 struct cbq_class
*cl_prev
= q
->active
[prio
];
360 cl
= cl_prev
->next_alive
;
362 cl_prev
->next_alive
= cl
->next_alive
;
363 cl
->next_alive
= NULL
;
365 if (cl
== q
->active
[prio
]) {
366 q
->active
[prio
] = cl_prev
;
367 if (cl
== q
->active
[prio
]) {
368 q
->active
[prio
] = NULL
;
369 q
->activemask
&= ~(1<<prio
);
375 } while ((cl_prev
= cl
) != q
->active
[prio
]);
379 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
381 int toplevel
= q
->toplevel
;
383 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
387 PSCHED_GET_TIME(now
);
388 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
389 PSCHED_TADD2(q
->now
, incr
, now
);
392 if (PSCHED_TLESS(cl
->undertime
, now
)) {
393 q
->toplevel
= cl
->level
;
396 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
401 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
403 struct cbq_sched_data
*q
= qdisc_priv(sch
);
406 struct cbq_class
*cl
= cbq_classify(skb
, sch
, &ret
);
408 #ifdef CONFIG_NET_CLS_POLICE
412 if (ret
== NET_XMIT_BYPASS
)
418 #ifdef CONFIG_NET_CLS_POLICE
419 cl
->q
->__parent
= sch
;
421 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == NET_XMIT_SUCCESS
) {
423 sch
->bstats
.packets
++;
424 sch
->bstats
.bytes
+=len
;
425 cbq_mark_toplevel(q
, cl
);
427 cbq_activate_class(cl
);
432 cbq_mark_toplevel(q
, cl
);
438 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
440 struct cbq_sched_data
*q
= qdisc_priv(sch
);
441 struct cbq_class
*cl
;
444 if ((cl
= q
->tx_class
) == NULL
) {
451 cbq_mark_toplevel(q
, cl
);
453 #ifdef CONFIG_NET_CLS_POLICE
455 cl
->q
->__parent
= sch
;
457 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
459 sch
->qstats
.requeues
++;
461 cbq_activate_class(cl
);
469 /* Overlimit actions */
471 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473 static void cbq_ovl_classic(struct cbq_class
*cl
)
475 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
476 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
479 delay
+= cl
->offtime
;
482 Class goes to sleep, so that it will have no
483 chance to work avgidle. Let's forgive it 8)
485 BTW cbq-2.0 has a crap in this
486 place, apparently they forgot to shift it by cl->ewma_log.
489 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
490 if (cl
->avgidle
< cl
->minidle
)
491 cl
->avgidle
= cl
->minidle
;
494 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
496 cl
->xstats
.overactions
++;
499 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
500 q
->wd_expires
= delay
;
502 /* Dirty work! We must schedule wakeups based on
503 real available rate, rather than leaf rate,
504 which may be tiny (even zero).
506 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
508 psched_tdiff_t base_delay
= q
->wd_expires
;
510 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
511 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
512 if (delay
< base_delay
) {
519 q
->wd_expires
= base_delay
;
523 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
527 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
529 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
530 struct cbq_class
*this = cl
;
533 if (cl
->level
> q
->toplevel
) {
537 } while ((cl
= cl
->borrow
) != NULL
);
544 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546 static void cbq_ovl_delay(struct cbq_class
*cl
)
548 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
549 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
552 psched_time_t sched
= q
->now
;
555 delay
+= cl
->offtime
;
557 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
558 if (cl
->avgidle
< cl
->minidle
)
559 cl
->avgidle
= cl
->minidle
;
560 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
563 sched
+= delay
+ cl
->penalty
;
564 cl
->penalized
= sched
;
565 cl
->cpriority
= TC_CBQ_MAXPRIO
;
566 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
568 expires
= ktime_set(0, 0);
569 expires
= ktime_add_ns(expires
, PSCHED_US2NS(sched
));
570 if (hrtimer_try_to_cancel(&q
->delay_timer
) &&
571 ktime_to_ns(ktime_sub(q
->delay_timer
.expires
,
573 q
->delay_timer
.expires
= expires
;
574 hrtimer_restart(&q
->delay_timer
);
576 cl
->xstats
.overactions
++;
581 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
582 q
->wd_expires
= delay
;
585 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
587 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
589 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
591 cl
->penalized
= q
->now
+ cl
->penalty
;
593 if (cl
->cpriority
!= cl
->priority2
) {
594 cl
->cpriority
= cl
->priority2
;
595 q
->pmask
|= (1<<cl
->cpriority
);
596 cl
->xstats
.overactions
++;
601 /* TC_CBQ_OVL_DROP: penalize class by dropping */
603 static void cbq_ovl_drop(struct cbq_class
*cl
)
605 if (cl
->q
->ops
->drop
)
606 if (cl
->q
->ops
->drop(cl
->q
))
608 cl
->xstats
.overactions
++;
612 static psched_tdiff_t
cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
,
615 struct cbq_class
*cl
;
616 struct cbq_class
*cl_prev
= q
->active
[prio
];
617 psched_time_t sched
= now
;
623 cl
= cl_prev
->next_alive
;
624 if (now
- cl
->penalized
> 0) {
625 cl_prev
->next_alive
= cl
->next_alive
;
626 cl
->next_alive
= NULL
;
627 cl
->cpriority
= cl
->priority
;
629 cbq_activate_class(cl
);
631 if (cl
== q
->active
[prio
]) {
632 q
->active
[prio
] = cl_prev
;
633 if (cl
== q
->active
[prio
]) {
634 q
->active
[prio
] = NULL
;
639 cl
= cl_prev
->next_alive
;
640 } else if (sched
- cl
->penalized
> 0)
641 sched
= cl
->penalized
;
642 } while ((cl_prev
= cl
) != q
->active
[prio
]);
647 static enum hrtimer_restart
cbq_undelay(struct hrtimer
*timer
)
649 struct cbq_sched_data
*q
= container_of(timer
, struct cbq_sched_data
,
651 struct Qdisc
*sch
= q
->watchdog
.qdisc
;
653 psched_tdiff_t delay
= 0;
656 PSCHED_GET_TIME(now
);
662 int prio
= ffz(~pmask
);
667 tmp
= cbq_undelay_prio(q
, prio
, now
);
670 if (tmp
< delay
|| delay
== 0)
678 time
= ktime_set(0, 0);
679 time
= ktime_add_ns(time
, PSCHED_US2NS(now
+ delay
));
680 hrtimer_start(&q
->delay_timer
, time
, HRTIMER_MODE_ABS
);
683 sch
->flags
&= ~TCQ_F_THROTTLED
;
684 netif_schedule(sch
->dev
);
685 return HRTIMER_NORESTART
;
689 #ifdef CONFIG_NET_CLS_POLICE
691 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
694 struct Qdisc
*sch
= child
->__parent
;
695 struct cbq_sched_data
*q
= qdisc_priv(sch
);
696 struct cbq_class
*cl
= q
->rx_class
;
700 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
702 cbq_mark_toplevel(q
, cl
);
705 cl
->q
->__parent
= sch
;
707 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
709 sch
->bstats
.packets
++;
710 sch
->bstats
.bytes
+=len
;
712 cbq_activate_class(cl
);
725 It is mission critical procedure.
727 We "regenerate" toplevel cutoff, if transmitting class
728 has backlog and it is not regulated. It is not part of
729 original CBQ description, but looks more reasonable.
730 Probably, it is wrong. This question needs further investigation.
733 static __inline__
void
734 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
735 struct cbq_class
*borrowed
)
737 if (cl
&& q
->toplevel
>= borrowed
->level
) {
738 if (cl
->q
->q
.qlen
> 1) {
740 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
741 q
->toplevel
= borrowed
->level
;
744 } while ((borrowed
=borrowed
->borrow
) != NULL
);
747 /* It is not necessary now. Uncommenting it
748 will save CPU cycles, but decrease fairness.
750 q
->toplevel
= TC_CBQ_MAXLEVEL
;
756 cbq_update(struct cbq_sched_data
*q
)
758 struct cbq_class
*this = q
->tx_class
;
759 struct cbq_class
*cl
= this;
764 for ( ; cl
; cl
= cl
->share
) {
765 long avgidle
= cl
->avgidle
;
768 cl
->bstats
.packets
++;
769 cl
->bstats
.bytes
+= len
;
772 (now - last) is total time between packet right edges.
773 (last_pktlen/rate) is "virtual" busy time, so that
775 idle = (now - last) - last_pktlen/rate
778 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
779 if ((unsigned long)idle
> 128*1024*1024) {
780 avgidle
= cl
->maxidle
;
782 idle
-= L2T(cl
, len
);
784 /* true_avgidle := (1-W)*true_avgidle + W*idle,
785 where W=2^{-ewma_log}. But cl->avgidle is scaled:
786 cl->avgidle == true_avgidle/W,
789 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
793 /* Overlimit or at-limit */
795 if (avgidle
< cl
->minidle
)
796 avgidle
= cl
->minidle
;
798 cl
->avgidle
= avgidle
;
800 /* Calculate expected time, when this class
801 will be allowed to send.
803 (1-W)*true_avgidle + W*delay = 0, i.e.
804 idle = (1/W - 1)*(-true_avgidle)
806 idle = (1 - W)*(-cl->avgidle);
808 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
812 To maintain the rate allocated to the class,
813 we add to undertime virtual clock,
814 necessary to complete transmitted packet.
815 (len/phys_bandwidth has been already passed
816 to the moment of cbq_update)
819 idle
-= L2T(&q
->link
, len
);
820 idle
+= L2T(cl
, len
);
822 PSCHED_AUDIT_TDIFF(idle
);
824 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
828 PSCHED_SET_PASTPERFECT(cl
->undertime
);
829 if (avgidle
> cl
->maxidle
)
830 cl
->avgidle
= cl
->maxidle
;
832 cl
->avgidle
= avgidle
;
837 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
840 static __inline__
struct cbq_class
*
841 cbq_under_limit(struct cbq_class
*cl
)
843 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
844 struct cbq_class
*this_cl
= cl
;
846 if (cl
->tparent
== NULL
)
849 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
850 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
856 /* It is very suspicious place. Now overlimit
857 action is generated for not bounded classes
858 only if link is completely congested.
859 Though it is in agree with ancestor-only paradigm,
860 it looks very stupid. Particularly,
861 it means that this chunk of code will either
862 never be called or result in strong amplification
863 of burstiness. Dangerous, silly, and, however,
864 no another solution exists.
866 if ((cl
= cl
->borrow
) == NULL
) {
867 this_cl
->qstats
.overlimits
++;
868 this_cl
->overlimit(this_cl
);
871 if (cl
->level
> q
->toplevel
)
873 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
874 PSCHED_TLESS(q
->now
, cl
->undertime
));
880 static __inline__
struct sk_buff
*
881 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
883 struct cbq_sched_data
*q
= qdisc_priv(sch
);
884 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
888 cl_tail
= cl_prev
= q
->active
[prio
];
889 cl
= cl_prev
->next_alive
;
896 struct cbq_class
*borrow
= cl
;
899 (borrow
= cbq_under_limit(cl
)) == NULL
)
902 if (cl
->deficit
<= 0) {
903 /* Class exhausted its allotment per
904 this round. Switch to the next one.
907 cl
->deficit
+= cl
->quantum
;
911 skb
= cl
->q
->dequeue(cl
->q
);
913 /* Class did not give us any skb :-(
914 It could occur even if cl->q->q.qlen != 0
915 f.e. if cl->q == "tbf"
920 cl
->deficit
-= skb
->len
;
922 q
->tx_borrowed
= borrow
;
924 #ifndef CBQ_XSTATS_BORROWS_BYTES
925 borrow
->xstats
.borrows
++;
926 cl
->xstats
.borrows
++;
928 borrow
->xstats
.borrows
+= skb
->len
;
929 cl
->xstats
.borrows
+= skb
->len
;
932 q
->tx_len
= skb
->len
;
934 if (cl
->deficit
<= 0) {
935 q
->active
[prio
] = cl
;
937 cl
->deficit
+= cl
->quantum
;
942 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
943 /* Class is empty or penalized.
944 Unlink it from active chain.
946 cl_prev
->next_alive
= cl
->next_alive
;
947 cl
->next_alive
= NULL
;
949 /* Did cl_tail point to it? */
954 /* Was it the last class in this band? */
957 q
->active
[prio
] = NULL
;
958 q
->activemask
&= ~(1<<prio
);
960 cbq_activate_class(cl
);
964 q
->active
[prio
] = cl_tail
;
967 cbq_activate_class(cl
);
975 } while (cl_prev
!= cl_tail
);
978 q
->active
[prio
] = cl_prev
;
983 static __inline__
struct sk_buff
*
984 cbq_dequeue_1(struct Qdisc
*sch
)
986 struct cbq_sched_data
*q
= qdisc_priv(sch
);
990 activemask
= q
->activemask
&0xFF;
992 int prio
= ffz(~activemask
);
993 activemask
&= ~(1<<prio
);
994 skb
= cbq_dequeue_prio(sch
, prio
);
1001 static struct sk_buff
*
1002 cbq_dequeue(struct Qdisc
*sch
)
1004 struct sk_buff
*skb
;
1005 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1007 psched_tdiff_t incr
;
1009 PSCHED_GET_TIME(now
);
1010 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
1013 psched_tdiff_t incr2
;
1014 /* Time integrator. We calculate EOS time
1015 by adding expected packet transmission time.
1016 If real time is greater, we warp artificial clock,
1019 cbq_time = max(real_time, work);
1021 incr2
= L2T(&q
->link
, q
->tx_len
);
1022 PSCHED_TADD(q
->now
, incr2
);
1024 if ((incr
-= incr2
) < 0)
1027 PSCHED_TADD(q
->now
, incr
);
1033 skb
= cbq_dequeue_1(sch
);
1036 sch
->flags
&= ~TCQ_F_THROTTLED
;
1040 /* All the classes are overlimit.
1044 1. Scheduler is empty.
1045 2. Toplevel cutoff inhibited borrowing.
1046 3. Root class is overlimit.
1048 Reset 2d and 3d conditions and retry.
1050 Note, that NS and cbq-2.0 are buggy, peeking
1051 an arbitrary class is appropriate for ancestor-only
1052 sharing, but not for toplevel algorithm.
1054 Our version is better, but slower, because it requires
1055 two passes, but it is unavoidable with top-level sharing.
1058 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1059 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1062 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1063 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1066 /* No packets in scheduler or nobody wants to give them to us :-(
1067 Sigh... start watchdog timer in the last case. */
1070 sch
->qstats
.overlimits
++;
1072 qdisc_watchdog_schedule(&q
->watchdog
,
1073 now
+ q
->wd_expires
);
1078 /* CBQ class maintanance routines */
1080 static void cbq_adjust_levels(struct cbq_class
*this)
1087 struct cbq_class
*cl
;
1089 if ((cl
= this->children
) != NULL
) {
1091 if (cl
->level
> level
)
1093 } while ((cl
= cl
->sibling
) != this->children
);
1095 this->level
= level
+1;
1096 } while ((this = this->tparent
) != NULL
);
1099 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1101 struct cbq_class
*cl
;
1104 if (q
->quanta
[prio
] == 0)
1107 for (h
=0; h
<16; h
++) {
1108 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1109 /* BUGGGG... Beware! This expression suffer of
1110 arithmetic overflows!
1112 if (cl
->priority
== prio
) {
1113 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1116 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1117 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1118 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1124 static void cbq_sync_defmap(struct cbq_class
*cl
)
1126 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1127 struct cbq_class
*split
= cl
->split
;
1134 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1135 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1136 split
->defaults
[i
] = NULL
;
1139 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1140 int level
= split
->level
;
1142 if (split
->defaults
[i
])
1145 for (h
=0; h
<16; h
++) {
1146 struct cbq_class
*c
;
1148 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1149 if (c
->split
== split
&& c
->level
< level
&&
1151 split
->defaults
[i
] = c
;
1159 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1161 struct cbq_class
*split
= NULL
;
1164 if ((split
= cl
->split
) == NULL
)
1166 splitid
= split
->classid
;
1169 if (split
== NULL
|| split
->classid
!= splitid
) {
1170 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1171 if (split
->classid
== splitid
)
1178 if (cl
->split
!= split
) {
1180 cbq_sync_defmap(cl
);
1182 cl
->defmap
= def
&mask
;
1184 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1186 cbq_sync_defmap(cl
);
1189 static void cbq_unlink_class(struct cbq_class
*this)
1191 struct cbq_class
*cl
, **clp
;
1192 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1194 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1202 if (this->tparent
) {
1211 } while ((cl
= *clp
) != this->sibling
);
1213 if (this->tparent
->children
== this) {
1214 this->tparent
->children
= this->sibling
;
1215 if (this->sibling
== this)
1216 this->tparent
->children
= NULL
;
1219 BUG_TRAP(this->sibling
== this);
1223 static void cbq_link_class(struct cbq_class
*this)
1225 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1226 unsigned h
= cbq_hash(this->classid
);
1227 struct cbq_class
*parent
= this->tparent
;
1229 this->sibling
= this;
1230 this->next
= q
->classes
[h
];
1231 q
->classes
[h
] = this;
1236 if (parent
->children
== NULL
) {
1237 parent
->children
= this;
1239 this->sibling
= parent
->children
->sibling
;
1240 parent
->children
->sibling
= this;
1244 static unsigned int cbq_drop(struct Qdisc
* sch
)
1246 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1247 struct cbq_class
*cl
, *cl_head
;
1251 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1252 if ((cl_head
= q
->active
[prio
]) == NULL
)
1257 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1260 cbq_deactivate_class(cl
);
1263 } while ((cl
= cl
->next_alive
) != cl_head
);
1269 cbq_reset(struct Qdisc
* sch
)
1271 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1272 struct cbq_class
*cl
;
1279 q
->tx_borrowed
= NULL
;
1280 qdisc_watchdog_cancel(&q
->watchdog
);
1281 hrtimer_cancel(&q
->delay_timer
);
1282 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1283 PSCHED_GET_TIME(q
->now
);
1286 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1287 q
->active
[prio
] = NULL
;
1289 for (h
= 0; h
< 16; h
++) {
1290 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1293 cl
->next_alive
= NULL
;
1294 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1295 cl
->avgidle
= cl
->maxidle
;
1296 cl
->deficit
= cl
->quantum
;
1297 cl
->cpriority
= cl
->priority
;
1304 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1306 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1307 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1308 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1310 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1311 cl
->ewma_log
= lss
->ewma_log
;
1312 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1313 cl
->avpkt
= lss
->avpkt
;
1314 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1315 cl
->minidle
= -(long)lss
->minidle
;
1316 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1317 cl
->maxidle
= lss
->maxidle
;
1318 cl
->avgidle
= lss
->maxidle
;
1320 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1321 cl
->offtime
= lss
->offtime
;
1325 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1327 q
->nclasses
[cl
->priority
]--;
1328 q
->quanta
[cl
->priority
] -= cl
->weight
;
1329 cbq_normalize_quanta(q
, cl
->priority
);
1332 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1334 q
->nclasses
[cl
->priority
]++;
1335 q
->quanta
[cl
->priority
] += cl
->weight
;
1336 cbq_normalize_quanta(q
, cl
->priority
);
1339 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1341 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1344 cl
->allot
= wrr
->allot
;
1346 cl
->weight
= wrr
->weight
;
1347 if (wrr
->priority
) {
1348 cl
->priority
= wrr
->priority
-1;
1349 cl
->cpriority
= cl
->priority
;
1350 if (cl
->priority
>= cl
->priority2
)
1351 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1358 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1360 switch (ovl
->strategy
) {
1361 case TC_CBQ_OVL_CLASSIC
:
1362 cl
->overlimit
= cbq_ovl_classic
;
1364 case TC_CBQ_OVL_DELAY
:
1365 cl
->overlimit
= cbq_ovl_delay
;
1367 case TC_CBQ_OVL_LOWPRIO
:
1368 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1369 ovl
->priority2
-1 <= cl
->priority
)
1371 cl
->priority2
= ovl
->priority2
-1;
1372 cl
->overlimit
= cbq_ovl_lowprio
;
1374 case TC_CBQ_OVL_DROP
:
1375 cl
->overlimit
= cbq_ovl_drop
;
1377 case TC_CBQ_OVL_RCLASSIC
:
1378 cl
->overlimit
= cbq_ovl_rclassic
;
1383 cl
->penalty
= ovl
->penalty
;
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1390 cl
->police
= p
->police
;
1392 if (cl
->q
->handle
) {
1393 if (p
->police
== TC_POLICE_RECLASSIFY
)
1394 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1396 cl
->q
->reshape_fail
= NULL
;
1402 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1404 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1408 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1410 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1411 struct rtattr
*tb
[TCA_CBQ_MAX
];
1412 struct tc_ratespec
*r
;
1414 if (rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
) < 0 ||
1415 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1416 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1419 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1420 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1423 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1425 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
)
1429 q
->link
.sibling
= &q
->link
;
1430 q
->link
.classid
= sch
->handle
;
1431 q
->link
.qdisc
= sch
;
1432 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1434 q
->link
.q
= &noop_qdisc
;
1436 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1437 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1438 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1439 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1440 q
->link
.overlimit
= cbq_ovl_classic
;
1441 q
->link
.allot
= psched_mtu(sch
->dev
);
1442 q
->link
.quantum
= q
->link
.allot
;
1443 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1445 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1446 q
->link
.avpkt
= q
->link
.allot
/2;
1447 q
->link
.minidle
= -0x7FFFFFFF;
1448 q
->link
.stats_lock
= &sch
->dev
->queue_lock
;
1450 qdisc_watchdog_init(&q
->watchdog
, sch
);
1451 hrtimer_init(&q
->delay_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
1452 q
->delay_timer
.function
= cbq_undelay
;
1453 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1454 PSCHED_GET_TIME(q
->now
);
1457 cbq_link_class(&q
->link
);
1459 if (tb
[TCA_CBQ_LSSOPT
-1])
1460 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1462 cbq_addprio(q
, &q
->link
);
1466 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1468 unsigned char *b
= skb_tail_pointer(skb
);
1470 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1474 skb_trim(skb
, b
- skb
->data
);
1478 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1480 unsigned char *b
= skb_tail_pointer(skb
);
1481 struct tc_cbq_lssopt opt
;
1484 if (cl
->borrow
== NULL
)
1485 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1486 if (cl
->share
== NULL
)
1487 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1488 opt
.ewma_log
= cl
->ewma_log
;
1489 opt
.level
= cl
->level
;
1490 opt
.avpkt
= cl
->avpkt
;
1491 opt
.maxidle
= cl
->maxidle
;
1492 opt
.minidle
= (u32
)(-cl
->minidle
);
1493 opt
.offtime
= cl
->offtime
;
1495 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1499 skb_trim(skb
, b
- skb
->data
);
1503 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1505 unsigned char *b
= skb_tail_pointer(skb
);
1506 struct tc_cbq_wrropt opt
;
1509 opt
.allot
= cl
->allot
;
1510 opt
.priority
= cl
->priority
+1;
1511 opt
.cpriority
= cl
->cpriority
+1;
1512 opt
.weight
= cl
->weight
;
1513 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1517 skb_trim(skb
, b
- skb
->data
);
1521 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1523 unsigned char *b
= skb_tail_pointer(skb
);
1524 struct tc_cbq_ovl opt
;
1526 opt
.strategy
= cl
->ovl_strategy
;
1527 opt
.priority2
= cl
->priority2
+1;
1529 opt
.penalty
= cl
->penalty
;
1530 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1534 skb_trim(skb
, b
- skb
->data
);
1538 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1540 unsigned char *b
= skb_tail_pointer(skb
);
1541 struct tc_cbq_fopt opt
;
1543 if (cl
->split
|| cl
->defmap
) {
1544 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1545 opt
.defmap
= cl
->defmap
;
1547 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1552 skb_trim(skb
, b
- skb
->data
);
1556 #ifdef CONFIG_NET_CLS_POLICE
1557 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1559 unsigned char *b
= skb_tail_pointer(skb
);
1560 struct tc_cbq_police opt
;
1563 opt
.police
= cl
->police
;
1566 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1571 skb_trim(skb
, b
- skb
->data
);
1576 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1578 if (cbq_dump_lss(skb
, cl
) < 0 ||
1579 cbq_dump_rate(skb
, cl
) < 0 ||
1580 cbq_dump_wrr(skb
, cl
) < 0 ||
1581 cbq_dump_ovl(skb
, cl
) < 0 ||
1582 #ifdef CONFIG_NET_CLS_POLICE
1583 cbq_dump_police(skb
, cl
) < 0 ||
1585 cbq_dump_fopt(skb
, cl
) < 0)
1590 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1592 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1593 unsigned char *b
= skb_tail_pointer(skb
);
1596 rta
= (struct rtattr
*)b
;
1597 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1598 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1599 goto rtattr_failure
;
1600 rta
->rta_len
= skb_tail_pointer(skb
) - b
;
1604 skb_trim(skb
, b
- skb
->data
);
1609 cbq_dump_stats(struct Qdisc
*sch
, struct gnet_dump
*d
)
1611 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1613 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1614 return gnet_stats_copy_app(d
, &q
->link
.xstats
, sizeof(q
->link
.xstats
));
1618 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1619 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1621 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1622 unsigned char *b
= skb_tail_pointer(skb
);
1626 tcm
->tcm_parent
= cl
->tparent
->classid
;
1628 tcm
->tcm_parent
= TC_H_ROOT
;
1629 tcm
->tcm_handle
= cl
->classid
;
1630 tcm
->tcm_info
= cl
->q
->handle
;
1632 rta
= (struct rtattr
*)b
;
1633 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1634 if (cbq_dump_attr(skb
, cl
) < 0)
1635 goto rtattr_failure
;
1636 rta
->rta_len
= skb_tail_pointer(skb
) - b
;
1640 skb_trim(skb
, b
- skb
->data
);
1645 cbq_dump_class_stats(struct Qdisc
*sch
, unsigned long arg
,
1646 struct gnet_dump
*d
)
1648 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1649 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1651 cl
->qstats
.qlen
= cl
->q
->q
.qlen
;
1652 cl
->xstats
.avgidle
= cl
->avgidle
;
1653 cl
->xstats
.undertime
= 0;
1655 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1656 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1658 if (gnet_stats_copy_basic(d
, &cl
->bstats
) < 0 ||
1659 #ifdef CONFIG_NET_ESTIMATOR
1660 gnet_stats_copy_rate_est(d
, &cl
->rate_est
) < 0 ||
1662 gnet_stats_copy_queue(d
, &cl
->qstats
) < 0)
1665 return gnet_stats_copy_app(d
, &cl
->xstats
, sizeof(cl
->xstats
));
1668 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1671 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1675 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
,
1676 cl
->classid
)) == NULL
)
1679 #ifdef CONFIG_NET_CLS_POLICE
1680 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1681 new->reshape_fail
= cbq_reshape_fail
;
1685 *old
= xchg(&cl
->q
, new);
1686 qdisc_tree_decrease_qlen(*old
, (*old
)->q
.qlen
);
1688 sch_tree_unlock(sch
);
1695 static struct Qdisc
*
1696 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1698 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1700 return cl
? cl
->q
: NULL
;
1703 static void cbq_qlen_notify(struct Qdisc
*sch
, unsigned long arg
)
1705 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1707 if (cl
->q
->q
.qlen
== 0)
1708 cbq_deactivate_class(cl
);
1711 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1713 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1714 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1718 return (unsigned long)cl
;
1723 static void cbq_destroy_filters(struct cbq_class
*cl
)
1725 struct tcf_proto
*tp
;
1727 while ((tp
= cl
->filter_list
) != NULL
) {
1728 cl
->filter_list
= tp
->next
;
1733 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1735 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1737 BUG_TRAP(!cl
->filters
);
1739 cbq_destroy_filters(cl
);
1740 qdisc_destroy(cl
->q
);
1741 qdisc_put_rtab(cl
->R_tab
);
1742 #ifdef CONFIG_NET_ESTIMATOR
1743 gen_kill_estimator(&cl
->bstats
, &cl
->rate_est
);
1750 cbq_destroy(struct Qdisc
* sch
)
1752 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1753 struct cbq_class
*cl
;
1756 #ifdef CONFIG_NET_CLS_POLICE
1760 * Filters must be destroyed first because we don't destroy the
1761 * classes from root to leafs which means that filters can still
1762 * be bound to classes which have been destroyed already. --TGR '04
1764 for (h
= 0; h
< 16; h
++)
1765 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1766 cbq_destroy_filters(cl
);
1768 for (h
= 0; h
< 16; h
++) {
1769 struct cbq_class
*next
;
1771 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1773 cbq_destroy_class(sch
, cl
);
1778 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1780 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1782 if (--cl
->refcnt
== 0) {
1783 #ifdef CONFIG_NET_CLS_POLICE
1784 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1786 spin_lock_bh(&sch
->dev
->queue_lock
);
1787 if (q
->rx_class
== cl
)
1789 spin_unlock_bh(&sch
->dev
->queue_lock
);
1792 cbq_destroy_class(sch
, cl
);
1797 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1801 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1802 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1803 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1804 struct rtattr
*tb
[TCA_CBQ_MAX
];
1805 struct cbq_class
*parent
;
1806 struct qdisc_rate_table
*rtab
= NULL
;
1808 if (opt
==NULL
|| rtattr_parse_nested(tb
, TCA_CBQ_MAX
, opt
))
1811 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1812 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1815 if (tb
[TCA_CBQ_FOPT
-1] &&
1816 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1819 if (tb
[TCA_CBQ_RATE
-1] &&
1820 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1823 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1824 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1827 if (tb
[TCA_CBQ_WRROPT
-1] &&
1828 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1831 #ifdef CONFIG_NET_CLS_POLICE
1832 if (tb
[TCA_CBQ_POLICE
-1] &&
1833 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1840 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1842 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1846 if (tb
[TCA_CBQ_RATE
-1]) {
1847 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1852 /* Change class parameters */
1855 if (cl
->next_alive
!= NULL
)
1856 cbq_deactivate_class(cl
);
1859 rtab
= xchg(&cl
->R_tab
, rtab
);
1860 qdisc_put_rtab(rtab
);
1863 if (tb
[TCA_CBQ_LSSOPT
-1])
1864 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1866 if (tb
[TCA_CBQ_WRROPT
-1]) {
1868 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1871 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1872 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1874 #ifdef CONFIG_NET_CLS_POLICE
1875 if (tb
[TCA_CBQ_POLICE
-1])
1876 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1879 if (tb
[TCA_CBQ_FOPT
-1])
1880 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1883 cbq_activate_class(cl
);
1885 sch_tree_unlock(sch
);
1887 #ifdef CONFIG_NET_ESTIMATOR
1888 if (tca
[TCA_RATE
-1])
1889 gen_replace_estimator(&cl
->bstats
, &cl
->rate_est
,
1890 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1895 if (parentid
== TC_H_ROOT
)
1898 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1899 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1902 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1908 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1912 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1914 for (i
=0; i
<0x8000; i
++) {
1915 if (++q
->hgenerator
>= 0x8000)
1917 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1923 classid
= classid
|q
->hgenerator
;
1928 parent
= cbq_class_lookup(q
, parentid
);
1935 cl
= kzalloc(sizeof(*cl
), GFP_KERNEL
);
1941 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
, classid
)))
1942 cl
->q
= &noop_qdisc
;
1943 cl
->classid
= classid
;
1944 cl
->tparent
= parent
;
1946 cl
->allot
= parent
->allot
;
1947 cl
->quantum
= cl
->allot
;
1948 cl
->weight
= cl
->R_tab
->rate
.rate
;
1949 cl
->stats_lock
= &sch
->dev
->queue_lock
;
1953 cl
->borrow
= cl
->tparent
;
1954 if (cl
->tparent
!= &q
->link
)
1955 cl
->share
= cl
->tparent
;
1956 cbq_adjust_levels(parent
);
1957 cl
->minidle
= -0x7FFFFFFF;
1958 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1959 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1960 if (cl
->ewma_log
==0)
1961 cl
->ewma_log
= q
->link
.ewma_log
;
1963 cl
->maxidle
= q
->link
.maxidle
;
1965 cl
->avpkt
= q
->link
.avpkt
;
1966 cl
->overlimit
= cbq_ovl_classic
;
1967 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1968 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1969 #ifdef CONFIG_NET_CLS_POLICE
1970 if (tb
[TCA_CBQ_POLICE
-1])
1971 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1973 if (tb
[TCA_CBQ_FOPT
-1])
1974 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1975 sch_tree_unlock(sch
);
1977 #ifdef CONFIG_NET_ESTIMATOR
1978 if (tca
[TCA_RATE
-1])
1979 gen_new_estimator(&cl
->bstats
, &cl
->rate_est
,
1980 cl
->stats_lock
, tca
[TCA_RATE
-1]);
1983 *arg
= (unsigned long)cl
;
1987 qdisc_put_rtab(rtab
);
1991 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1993 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1994 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1997 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
2002 qlen
= cl
->q
->q
.qlen
;
2004 qdisc_tree_decrease_qlen(cl
->q
, qlen
);
2007 cbq_deactivate_class(cl
);
2009 if (q
->tx_borrowed
== cl
)
2010 q
->tx_borrowed
= q
->tx_class
;
2011 if (q
->tx_class
== cl
) {
2013 q
->tx_borrowed
= NULL
;
2015 #ifdef CONFIG_NET_CLS_POLICE
2016 if (q
->rx_class
== cl
)
2020 cbq_unlink_class(cl
);
2021 cbq_adjust_levels(cl
->tparent
);
2023 cbq_sync_defmap(cl
);
2026 sch_tree_unlock(sch
);
2028 if (--cl
->refcnt
== 0)
2029 cbq_destroy_class(sch
, cl
);
2034 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2036 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2037 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2042 return &cl
->filter_list
;
2045 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2048 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2049 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2050 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2053 if (p
&& p
->level
<= cl
->level
)
2056 return (unsigned long)cl
;
2061 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2063 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2068 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2070 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2076 for (h
= 0; h
< 16; h
++) {
2077 struct cbq_class
*cl
;
2079 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2080 if (arg
->count
< arg
->skip
) {
2084 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2093 static struct Qdisc_class_ops cbq_class_ops
= {
2096 .qlen_notify
= cbq_qlen_notify
,
2099 .change
= cbq_change_class
,
2100 .delete = cbq_delete
,
2102 .tcf_chain
= cbq_find_tcf
,
2103 .bind_tcf
= cbq_bind_filter
,
2104 .unbind_tcf
= cbq_unbind_filter
,
2105 .dump
= cbq_dump_class
,
2106 .dump_stats
= cbq_dump_class_stats
,
2109 static struct Qdisc_ops cbq_qdisc_ops
= {
2111 .cl_ops
= &cbq_class_ops
,
2113 .priv_size
= sizeof(struct cbq_sched_data
),
2114 .enqueue
= cbq_enqueue
,
2115 .dequeue
= cbq_dequeue
,
2116 .requeue
= cbq_requeue
,
2120 .destroy
= cbq_destroy
,
2123 .dump_stats
= cbq_dump_stats
,
2124 .owner
= THIS_MODULE
,
2127 static int __init
cbq_module_init(void)
2129 return register_qdisc(&cbq_qdisc_ops
);
2131 static void __exit
cbq_module_exit(void)
2133 unregister_qdisc(&cbq_qdisc_ops
);
2135 module_init(cbq_module_init
)
2136 module_exit(cbq_module_exit
)
2137 MODULE_LICENSE("GPL");