2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranted Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data
;
93 struct cbq_class
*next
; /* hash table link */
94 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
98 unsigned char priority
; /* class priority */
99 unsigned char priority2
; /* priority to be used after overlimit */
100 unsigned char ewma_log
; /* time constant for idle time calculation */
101 unsigned char ovl_strategy
;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police
;
108 /* Link-sharing scheduler parameters */
109 long maxidle
; /* Class paramters: see below. */
113 struct qdisc_rate_table
*R_tab
;
115 /* Overlimit strategy parameters */
116 void (*overlimit
)(struct cbq_class
*cl
);
119 /* General scheduler (WRR) parameters */
121 long quantum
; /* Allotment per WRR round */
122 long weight
; /* Relative allotment: see below */
124 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
125 struct cbq_class
*split
; /* Ptr to split node */
126 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
127 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
128 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
130 struct cbq_class
*sibling
; /* Sibling chain */
131 struct cbq_class
*children
; /* Pointer to children chain */
133 struct Qdisc
*q
; /* Elementary queueing discipline */
137 unsigned char cpriority
; /* Effective priority */
138 unsigned char delayed
;
139 unsigned char level
; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last
; /* Last end of service */
145 psched_time_t undertime
;
147 long deficit
; /* Saved deficit for WRR */
148 unsigned long penalized
;
149 struct tc_stats stats
;
150 struct tc_cbq_xstats xstats
;
152 struct tcf_proto
*filter_list
;
157 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
160 struct cbq_sched_data
162 struct cbq_class
*classes
[16]; /* Hash table of all classes */
163 int nclasses
[TC_CBQ_MAXPRIO
+1];
164 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
166 struct cbq_class link
;
169 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
172 #ifdef CONFIG_NET_CLS_POLICE
173 struct cbq_class
*rx_class
;
175 struct cbq_class
*tx_class
;
176 struct cbq_class
*tx_borrowed
;
178 psched_time_t now
; /* Cached timestamp */
179 psched_time_t now_rt
; /* Cached real time */
182 struct timer_list delay_timer
;
183 struct timer_list wd_timer
; /* Watchdog timer,
193 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196 static __inline__
unsigned cbq_hash(u32 h
)
203 static __inline__
struct cbq_class
*
204 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
206 struct cbq_class
*cl
;
208 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
209 if (cl
->classid
== classid
)
214 #ifdef CONFIG_NET_CLS_POLICE
216 static struct cbq_class
*
217 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
219 struct cbq_class
*cl
, *new;
221 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
222 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
230 /* Classify packet. The procedure is pretty complicated, but
231 it allows us to combine link sharing and priority scheduling
234 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
235 so that it resolves to split nodes. Then packets are classified
236 by logical priority, or a more specific classifier may be attached
240 static struct cbq_class
*
241 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
)
243 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
244 struct cbq_class
*head
= &q
->link
;
245 struct cbq_class
**defmap
;
246 struct cbq_class
*cl
= NULL
;
247 u32 prio
= skb
->priority
;
248 struct tcf_result res
;
251 * Step 1. If skb->priority points to one of our classes, use it.
253 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
254 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
260 defmap
= head
->defaults
;
263 * Step 2+n. Apply classifier.
265 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
268 if ((cl
= (void*)res
.class) == NULL
) {
269 if (TC_H_MAJ(res
.classid
))
270 cl
= cbq_class_lookup(q
, res
.classid
);
271 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
272 cl
= defmap
[TC_PRIO_BESTEFFORT
];
274 if (cl
== NULL
|| cl
->level
>= head
->level
)
278 #ifdef CONFIG_NET_CLS_POLICE
280 case TC_POLICE_RECLASSIFY
:
281 return cbq_reclassify(skb
, cl
);
291 * Step 3+n. If classifier selected a link sharing class,
292 * apply agency specific classifier.
293 * Repeat this procdure until we hit a leaf node.
302 * Step 4. No success...
304 if (TC_H_MAJ(prio
) == 0 &&
305 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
306 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
313 A packet has just been enqueued on the empty class.
314 cbq_activate_class adds it to the tail of active class list
315 of its priority band.
318 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
320 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
321 int prio
= cl
->cpriority
;
322 struct cbq_class
*cl_tail
;
324 cl_tail
= q
->active
[prio
];
325 q
->active
[prio
] = cl
;
327 if (cl_tail
!= NULL
) {
328 cl
->next_alive
= cl_tail
->next_alive
;
329 cl_tail
->next_alive
= cl
;
332 q
->activemask
|= (1<<prio
);
337 Unlink class from active chain.
338 Note that this same procedure is done directly in cbq_dequeue*
339 during round-robin procedure.
342 static void cbq_deactivate_class(struct cbq_class
*this)
344 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
345 int prio
= this->cpriority
;
346 struct cbq_class
*cl
;
347 struct cbq_class
*cl_prev
= q
->active
[prio
];
350 cl
= cl_prev
->next_alive
;
352 cl_prev
->next_alive
= cl
->next_alive
;
353 cl
->next_alive
= NULL
;
355 if (cl
== q
->active
[prio
]) {
356 q
->active
[prio
] = cl_prev
;
357 if (cl
== q
->active
[prio
]) {
358 q
->active
[prio
] = NULL
;
359 q
->activemask
&= ~(1<<prio
);
364 cl
= cl_prev
->next_alive
;
367 } while ((cl_prev
= cl
) != q
->active
[prio
]);
371 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
373 int toplevel
= q
->toplevel
;
375 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
379 PSCHED_GET_TIME(now
);
380 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
381 PSCHED_TADD2(q
->now
, incr
, now
);
384 if (PSCHED_TLESS(cl
->undertime
, now
)) {
385 q
->toplevel
= cl
->level
;
388 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
393 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
395 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
396 struct cbq_class
*cl
= cbq_classify(skb
, sch
);
398 int ret
= NET_XMIT_POLICED
;
400 #ifdef CONFIG_NET_CLS_POLICE
404 #ifdef CONFIG_NET_CLS_POLICE
405 cl
->q
->__parent
= sch
;
407 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == 0) {
409 sch
->stats
.packets
++;
410 sch
->stats
.bytes
+=len
;
411 cbq_mark_toplevel(q
, cl
);
413 cbq_activate_class(cl
);
422 cbq_mark_toplevel(q
, cl
);
429 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
431 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
432 struct cbq_class
*cl
;
435 if ((cl
= q
->tx_class
) == NULL
) {
442 cbq_mark_toplevel(q
, cl
);
444 #ifdef CONFIG_NET_CLS_POLICE
446 cl
->q
->__parent
= sch
;
448 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
451 cbq_activate_class(cl
);
459 /* Overlimit actions */
461 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
463 static void cbq_ovl_classic(struct cbq_class
*cl
)
465 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
466 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
469 delay
+= cl
->offtime
;
472 Class goes to sleep, so that it will have no
473 chance to work avgidle. Let's forgive it 8)
475 BTW cbq-2.0 has a crap in this
476 place, apparently they forgot to shift it by cl->ewma_log.
479 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
480 if (cl
->avgidle
< cl
->minidle
)
481 cl
->avgidle
= cl
->minidle
;
484 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
486 cl
->xstats
.overactions
++;
489 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
490 q
->wd_expires
= delay
;
492 /* Dirty work! We must schedule wakeups based on
493 real available rate, rather than leaf rate,
494 which may be tiny (even zero).
496 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
498 psched_tdiff_t base_delay
= q
->wd_expires
;
500 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
501 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
502 if (delay
< base_delay
) {
509 q
->wd_expires
= delay
;
513 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
517 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
519 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
520 struct cbq_class
*this = cl
;
523 if (cl
->level
> q
->toplevel
) {
527 } while ((cl
= cl
->borrow
) != NULL
);
534 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
536 static void cbq_ovl_delay(struct cbq_class
*cl
)
538 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
539 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
542 unsigned long sched
= jiffies
;
544 delay
+= cl
->offtime
;
546 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
547 if (cl
->avgidle
< cl
->minidle
)
548 cl
->avgidle
= cl
->minidle
;
549 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
552 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
553 cl
->penalized
= sched
;
554 cl
->cpriority
= TC_CBQ_MAXPRIO
;
555 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
556 if (del_timer(&q
->delay_timer
) &&
557 (long)(q
->delay_timer
.expires
- sched
) > 0)
558 q
->delay_timer
.expires
= sched
;
559 add_timer(&q
->delay_timer
);
561 cl
->xstats
.overactions
++;
566 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
567 q
->wd_expires
= delay
;
570 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
572 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
574 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
576 cl
->penalized
= jiffies
+ cl
->penalty
;
578 if (cl
->cpriority
!= cl
->priority2
) {
579 cl
->cpriority
= cl
->priority2
;
580 q
->pmask
|= (1<<cl
->cpriority
);
581 cl
->xstats
.overactions
++;
586 /* TC_CBQ_OVL_DROP: penalize class by dropping */
588 static void cbq_ovl_drop(struct cbq_class
*cl
)
590 if (cl
->q
->ops
->drop
)
591 if (cl
->q
->ops
->drop(cl
->q
))
593 cl
->xstats
.overactions
++;
597 static void cbq_watchdog(unsigned long arg
)
599 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
600 sch
->flags
&= ~TCQ_F_THROTTLED
;
601 qdisc_wakeup(sch
->dev
);
604 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
606 struct cbq_class
*cl
;
607 struct cbq_class
*cl_prev
= q
->active
[prio
];
608 unsigned long now
= jiffies
;
609 unsigned long sched
= now
;
615 cl
= cl_prev
->next_alive
;
616 if ((long)(now
- cl
->penalized
) > 0) {
617 cl_prev
->next_alive
= cl
->next_alive
;
618 cl
->next_alive
= NULL
;
619 cl
->cpriority
= cl
->priority
;
621 cbq_activate_class(cl
);
623 if (cl
== q
->active
[prio
]) {
624 q
->active
[prio
] = cl_prev
;
625 if (cl
== q
->active
[prio
]) {
626 q
->active
[prio
] = NULL
;
631 cl
= cl_prev
->next_alive
;
632 } else if ((long)(sched
- cl
->penalized
) > 0)
633 sched
= cl
->penalized
;
634 } while ((cl_prev
= cl
) != q
->active
[prio
]);
636 return (long)(sched
- now
);
639 static void cbq_undelay(unsigned long arg
)
641 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
642 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
650 int prio
= ffz(~pmask
);
655 tmp
= cbq_undelay_prio(q
, prio
);
658 if (tmp
< delay
|| delay
== 0)
664 q
->delay_timer
.expires
= jiffies
+ delay
;
665 add_timer(&q
->delay_timer
);
668 sch
->flags
&= ~TCQ_F_THROTTLED
;
669 qdisc_wakeup(sch
->dev
);
673 #ifdef CONFIG_NET_CLS_POLICE
675 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
678 struct Qdisc
*sch
= child
->__parent
;
679 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
680 struct cbq_class
*cl
= q
->rx_class
;
684 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
686 cbq_mark_toplevel(q
, cl
);
689 cl
->q
->__parent
= sch
;
691 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
693 sch
->stats
.packets
++;
694 sch
->stats
.bytes
+=len
;
696 cbq_activate_class(cl
);
709 It is mission critical procedure.
711 We "regenerate" toplevel cutoff, if transmitting class
712 has backlog and it is not regulated. It is not part of
713 original CBQ description, but looks more reasonable.
714 Probably, it is wrong. This question needs further investigation.
717 static __inline__
void
718 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
719 struct cbq_class
*borrowed
)
721 if (cl
&& q
->toplevel
>= borrowed
->level
) {
722 if (cl
->q
->q
.qlen
> 1) {
724 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
725 q
->toplevel
= borrowed
->level
;
728 } while ((borrowed
=borrowed
->borrow
) != NULL
);
731 /* It is not necessary now. Uncommenting it
732 will save CPU cycles, but decrease fairness.
734 q
->toplevel
= TC_CBQ_MAXLEVEL
;
740 cbq_update(struct cbq_sched_data
*q
)
742 struct cbq_class
*this = q
->tx_class
;
743 struct cbq_class
*cl
= this;
748 for ( ; cl
; cl
= cl
->share
) {
749 long avgidle
= cl
->avgidle
;
753 cl
->stats
.bytes
+= len
;
756 (now - last) is total time between packet right edges.
757 (last_pktlen/rate) is "virtual" busy time, so that
759 idle = (now - last) - last_pktlen/rate
762 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
763 if ((unsigned long)idle
> 128*1024*1024) {
764 avgidle
= cl
->maxidle
;
766 idle
-= L2T(cl
, len
);
768 /* true_avgidle := (1-W)*true_avgidle + W*idle,
769 where W=2^{-ewma_log}. But cl->avgidle is scaled:
770 cl->avgidle == true_avgidle/W,
773 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
777 /* Overlimit or at-limit */
779 if (avgidle
< cl
->minidle
)
780 avgidle
= cl
->minidle
;
782 cl
->avgidle
= avgidle
;
784 /* Calculate expected time, when this class
785 will be allowed to send.
787 (1-W)*true_avgidle + W*delay = 0, i.e.
788 idle = (1/W - 1)*(-true_avgidle)
790 idle = (1 - W)*(-cl->avgidle);
792 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
796 To maintain the rate allocated to the class,
797 we add to undertime virtual clock,
798 necesary to complete transmitted packet.
799 (len/phys_bandwidth has been already passed
800 to the moment of cbq_update)
803 idle
-= L2T(&q
->link
, len
);
804 idle
+= L2T(cl
, len
);
806 PSCHED_AUDIT_TDIFF(idle
);
808 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
812 PSCHED_SET_PASTPERFECT(cl
->undertime
);
813 if (avgidle
> cl
->maxidle
)
814 cl
->avgidle
= cl
->maxidle
;
816 cl
->avgidle
= avgidle
;
821 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
824 static __inline__
struct cbq_class
*
825 cbq_under_limit(struct cbq_class
*cl
)
827 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
828 struct cbq_class
*this_cl
= cl
;
830 if (cl
->tparent
== NULL
)
833 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
834 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
840 /* It is very suspicious place. Now overlimit
841 action is generated for not bounded classes
842 only if link is completely congested.
843 Though it is in agree with ancestor-only paradigm,
844 it looks very stupid. Particularly,
845 it means that this chunk of code will either
846 never be called or result in strong amplification
847 of burstiness. Dangerous, silly, and, however,
848 no another solution exists.
850 if ((cl
= cl
->borrow
) == NULL
) {
851 this_cl
->stats
.overlimits
++;
852 this_cl
->overlimit(this_cl
);
855 if (cl
->level
> q
->toplevel
)
857 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
858 PSCHED_TLESS(q
->now
, cl
->undertime
));
864 static __inline__
struct sk_buff
*
865 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
867 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
868 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
872 cl_tail
= cl_prev
= q
->active
[prio
];
873 cl
= cl_prev
->next_alive
;
880 struct cbq_class
*borrow
= NULL
;
883 (borrow
= cbq_under_limit(cl
)) == NULL
)
886 if (cl
->deficit
<= 0) {
887 /* Class exhausted its allotment per
888 this round. Switch to the next one.
891 cl
->deficit
+= cl
->quantum
;
895 skb
= cl
->q
->dequeue(cl
->q
);
897 /* Class did not give us any skb :-(
898 It could occur even if cl->q->q.qlen != 0
899 f.e. if cl->q == "tbf"
904 cl
->deficit
-= skb
->len
;
906 q
->tx_borrowed
= borrow
;
908 #ifndef CBQ_XSTATS_BORROWS_BYTES
909 borrow
->xstats
.borrows
++;
910 cl
->xstats
.borrows
++;
912 borrow
->xstats
.borrows
+= skb
->len
;
913 cl
->xstats
.borrows
+= skb
->len
;
916 q
->tx_len
= skb
->len
;
918 if (cl
->deficit
<= 0) {
919 q
->active
[prio
] = cl
;
921 cl
->deficit
+= cl
->quantum
;
926 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
927 /* Class is empty or penalized.
928 Unlink it from active chain.
930 cl_prev
->next_alive
= cl
->next_alive
;
931 cl
->next_alive
= NULL
;
933 /* Did cl_tail point to it? */
938 /* Was it the last class in this band? */
941 q
->active
[prio
] = NULL
;
942 q
->activemask
&= ~(1<<prio
);
944 cbq_activate_class(cl
);
948 q
->active
[prio
] = cl_tail
;
951 cbq_activate_class(cl
);
959 } while (cl_prev
!= cl_tail
);
962 q
->active
[prio
] = cl_prev
;
967 static __inline__
struct sk_buff
*
968 cbq_dequeue_1(struct Qdisc
*sch
)
970 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
974 activemask
= q
->activemask
&0xFF;
976 int prio
= ffz(~activemask
);
977 activemask
&= ~(1<<prio
);
978 skb
= cbq_dequeue_prio(sch
, prio
);
985 static struct sk_buff
*
986 cbq_dequeue(struct Qdisc
*sch
)
989 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
993 PSCHED_GET_TIME(now
);
994 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
997 psched_tdiff_t incr2
;
998 /* Time integrator. We calculate EOS time
999 by adding expected packet transmittion time.
1000 If real time is greater, we warp artificial clock,
1003 cbq_time = max(real_time, work);
1005 incr2
= L2T(&q
->link
, q
->tx_len
);
1006 PSCHED_TADD(q
->now
, incr2
);
1008 if ((incr
-= incr2
) < 0)
1011 PSCHED_TADD(q
->now
, incr
);
1017 skb
= cbq_dequeue_1(sch
);
1020 sch
->flags
&= ~TCQ_F_THROTTLED
;
1024 /* All the classes are overlimit.
1028 1. Scheduler is empty.
1029 2. Toplevel cutoff inhibited borrowing.
1030 3. Root class is overlimit.
1032 Reset 2d and 3d conditions and retry.
1034 Note, that NS and cbq-2.0 are buggy, peeking
1035 an arbitrary class is appropriate for ancestor-only
1036 sharing, but not for toplevel algorithm.
1038 Our version is better, but slower, because it requires
1039 two passes, but it is unavoidable with top-level sharing.
1042 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1043 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1046 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1047 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1050 /* No packets in scheduler or nobody wants to give them to us :-(
1051 Sigh... start watchdog timer in the last case. */
1054 sch
->stats
.overlimits
++;
1055 if (q
->wd_expires
&& !sch
->dev
->tbusy
) {
1056 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1057 del_timer(&q
->wd_timer
);
1060 q
->wd_timer
.expires
= jiffies
+ delay
;
1061 add_timer(&q
->wd_timer
);
1062 sch
->flags
|= TCQ_F_THROTTLED
;
1068 /* CBQ class maintanance routines */
1070 static void cbq_adjust_levels(struct cbq_class
*this)
1077 struct cbq_class
*cl
;
1079 if ((cl
= this->children
) != NULL
) {
1081 if (cl
->level
> level
)
1083 } while ((cl
= cl
->sibling
) != this->children
);
1085 this->level
= level
+1;
1086 } while ((this = this->tparent
) != NULL
);
1089 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1091 struct cbq_class
*cl
;
1094 if (q
->quanta
[prio
] == 0)
1097 for (h
=0; h
<16; h
++) {
1098 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1099 /* BUGGGG... Beware! This expression suffer of
1100 arithmetic overflows!
1102 if (cl
->priority
== prio
) {
1103 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1106 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1107 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1108 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1114 static void cbq_sync_defmap(struct cbq_class
*cl
)
1116 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
1117 struct cbq_class
*split
= cl
->split
;
1124 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1125 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1126 split
->defaults
[i
] = NULL
;
1129 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1130 int level
= split
->level
;
1132 if (split
->defaults
[i
])
1135 for (h
=0; h
<16; h
++) {
1136 struct cbq_class
*c
;
1138 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1139 if (c
->split
== split
&& c
->level
< level
&&
1141 split
->defaults
[i
] = c
;
1149 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1151 struct cbq_class
*split
= NULL
;
1154 if ((split
= cl
->split
) == NULL
)
1156 splitid
= split
->classid
;
1159 if (split
== NULL
|| split
->classid
!= splitid
) {
1160 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1161 if (split
->classid
== splitid
)
1168 if (cl
->split
!= split
) {
1170 cbq_sync_defmap(cl
);
1172 cl
->defmap
= def
&mask
;
1174 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1176 cbq_sync_defmap(cl
);
1179 static void cbq_unlink_class(struct cbq_class
*this)
1181 struct cbq_class
*cl
, **clp
;
1182 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
1184 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1192 if (this->tparent
) {
1201 } while ((cl
= *clp
) != this->sibling
);
1203 if (this->tparent
->children
== this) {
1204 this->tparent
->children
= this->sibling
;
1205 if (this->sibling
== this)
1206 this->tparent
->children
= NULL
;
1209 BUG_TRAP(this->sibling
== this);
1213 static void cbq_link_class(struct cbq_class
*this)
1215 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)this->qdisc
->data
;
1216 unsigned h
= cbq_hash(this->classid
);
1217 struct cbq_class
*parent
= this->tparent
;
1219 this->sibling
= this;
1220 this->next
= q
->classes
[h
];
1221 q
->classes
[h
] = this;
1226 if (parent
->children
== NULL
) {
1227 parent
->children
= this;
1229 this->sibling
= parent
->children
->sibling
;
1230 parent
->children
->sibling
= this;
1234 static int cbq_drop(struct Qdisc
* sch
)
1236 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1237 struct cbq_class
*cl
, *cl_head
;
1240 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1241 if ((cl_head
= q
->active
[prio
]) == NULL
)
1246 if (cl
->q
->ops
->drop
&& cl
->q
->ops
->drop(cl
->q
))
1248 } while ((cl
= cl
->next_alive
) != cl_head
);
1254 cbq_reset(struct Qdisc
* sch
)
1256 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1257 struct cbq_class
*cl
;
1264 q
->tx_borrowed
= NULL
;
1265 del_timer(&q
->wd_timer
);
1266 del_timer(&q
->delay_timer
);
1267 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1268 PSCHED_GET_TIME(q
->now
);
1271 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1272 q
->active
[prio
] = NULL
;
1274 for (h
= 0; h
< 16; h
++) {
1275 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1278 cl
->next_alive
= NULL
;
1279 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1280 cl
->avgidle
= cl
->maxidle
;
1281 cl
->deficit
= cl
->quantum
;
1282 cl
->cpriority
= cl
->priority
;
1289 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1291 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1292 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1293 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1295 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1296 cl
->ewma_log
= lss
->ewma_log
;
1297 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1298 cl
->avpkt
= lss
->avpkt
;
1299 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1300 cl
->minidle
= -(long)lss
->minidle
;
1301 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1302 cl
->maxidle
= lss
->maxidle
;
1303 cl
->avgidle
= lss
->maxidle
;
1305 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1306 cl
->offtime
= lss
->offtime
;
1310 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1312 q
->nclasses
[cl
->priority
]--;
1313 q
->quanta
[cl
->priority
] -= cl
->weight
;
1314 cbq_normalize_quanta(q
, cl
->priority
);
1317 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1319 q
->nclasses
[cl
->priority
]++;
1320 q
->quanta
[cl
->priority
] += cl
->weight
;
1321 cbq_normalize_quanta(q
, cl
->priority
);
1324 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1326 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)cl
->qdisc
->data
;
1329 cl
->allot
= wrr
->allot
;
1331 cl
->weight
= wrr
->weight
;
1332 if (wrr
->priority
) {
1333 cl
->priority
= wrr
->priority
-1;
1334 cl
->cpriority
= cl
->priority
;
1335 if (cl
->priority
>= cl
->priority2
)
1336 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1343 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1345 switch (ovl
->strategy
) {
1346 case TC_CBQ_OVL_CLASSIC
:
1347 cl
->overlimit
= cbq_ovl_classic
;
1349 case TC_CBQ_OVL_DELAY
:
1350 cl
->overlimit
= cbq_ovl_delay
;
1352 case TC_CBQ_OVL_LOWPRIO
:
1353 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1354 ovl
->priority2
-1 <= cl
->priority
)
1356 cl
->priority2
= ovl
->priority2
-1;
1357 cl
->overlimit
= cbq_ovl_lowprio
;
1359 case TC_CBQ_OVL_DROP
:
1360 cl
->overlimit
= cbq_ovl_drop
;
1362 case TC_CBQ_OVL_RCLASSIC
:
1363 cl
->overlimit
= cbq_ovl_rclassic
;
1368 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1372 #ifdef CONFIG_NET_CLS_POLICE
1373 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1375 cl
->police
= p
->police
;
1377 if (cl
->q
->handle
) {
1378 if (p
->police
== TC_POLICE_RECLASSIFY
)
1379 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1381 cl
->q
->reshape_fail
= NULL
;
1387 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1389 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1393 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1395 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1396 struct rtattr
*tb
[TCA_CBQ_MAX
];
1397 struct tc_ratespec
*r
;
1399 if (rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)) < 0 ||
1400 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1401 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1404 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1405 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1408 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1411 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
) {
1417 q
->link
.sibling
= &q
->link
;
1418 q
->link
.classid
= sch
->handle
;
1419 q
->link
.qdisc
= sch
;
1420 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1421 q
->link
.q
= &noop_qdisc
;
1423 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1424 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1425 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1426 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1427 q
->link
.overlimit
= cbq_ovl_classic
;
1428 q
->link
.allot
= psched_mtu(sch
->dev
);
1429 q
->link
.quantum
= q
->link
.allot
;
1430 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1432 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1433 q
->link
.avpkt
= q
->link
.allot
/2;
1434 q
->link
.minidle
= -0x7FFFFFFF;
1435 q
->link
.stats
.lock
= &sch
->dev
->queue_lock
;
1437 init_timer(&q
->wd_timer
);
1438 q
->wd_timer
.data
= (unsigned long)sch
;
1439 q
->wd_timer
.function
= cbq_watchdog
;
1440 init_timer(&q
->delay_timer
);
1441 q
->delay_timer
.data
= (unsigned long)sch
;
1442 q
->delay_timer
.function
= cbq_undelay
;
1443 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1444 PSCHED_GET_TIME(q
->now
);
1447 cbq_link_class(&q
->link
);
1449 if (tb
[TCA_CBQ_LSSOPT
-1])
1450 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1452 cbq_addprio(q
, &q
->link
);
1456 #ifdef CONFIG_RTNETLINK
1458 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1460 unsigned char *b
= skb
->tail
;
1462 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1466 skb_trim(skb
, b
- skb
->data
);
1470 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1472 unsigned char *b
= skb
->tail
;
1473 struct tc_cbq_lssopt opt
;
1476 if (cl
->borrow
== NULL
)
1477 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1478 if (cl
->share
== NULL
)
1479 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1480 opt
.ewma_log
= cl
->ewma_log
;
1481 opt
.level
= cl
->level
;
1482 opt
.avpkt
= cl
->avpkt
;
1483 opt
.maxidle
= cl
->maxidle
;
1484 opt
.minidle
= (u32
)(-cl
->minidle
);
1485 opt
.offtime
= cl
->offtime
;
1487 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1491 skb_trim(skb
, b
- skb
->data
);
1495 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1497 unsigned char *b
= skb
->tail
;
1498 struct tc_cbq_wrropt opt
;
1501 opt
.allot
= cl
->allot
;
1502 opt
.priority
= cl
->priority
+1;
1503 opt
.cpriority
= cl
->cpriority
+1;
1504 opt
.weight
= cl
->weight
;
1505 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1509 skb_trim(skb
, b
- skb
->data
);
1513 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1515 unsigned char *b
= skb
->tail
;
1516 struct tc_cbq_ovl opt
;
1518 opt
.strategy
= cl
->ovl_strategy
;
1519 opt
.priority2
= cl
->priority2
+1;
1520 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1521 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1525 skb_trim(skb
, b
- skb
->data
);
1529 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1531 unsigned char *b
= skb
->tail
;
1532 struct tc_cbq_fopt opt
;
1534 if (cl
->split
|| cl
->defmap
) {
1535 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1536 opt
.defmap
= cl
->defmap
;
1538 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1543 skb_trim(skb
, b
- skb
->data
);
1547 #ifdef CONFIG_NET_CLS_POLICE
1548 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1550 unsigned char *b
= skb
->tail
;
1551 struct tc_cbq_police opt
;
1554 opt
.police
= cl
->police
;
1555 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1560 skb_trim(skb
, b
- skb
->data
);
1565 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1567 if (cbq_dump_lss(skb
, cl
) < 0 ||
1568 cbq_dump_rate(skb
, cl
) < 0 ||
1569 cbq_dump_wrr(skb
, cl
) < 0 ||
1570 cbq_dump_ovl(skb
, cl
) < 0 ||
1571 #ifdef CONFIG_NET_CLS_POLICE
1572 cbq_dump_police(skb
, cl
) < 0 ||
1574 cbq_dump_fopt(skb
, cl
) < 0)
1579 int cbq_copy_xstats(struct sk_buff
*skb
, struct tc_cbq_xstats
*st
)
1581 RTA_PUT(skb
, TCA_XSTATS
, sizeof(*st
), st
);
1589 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1591 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1592 unsigned char *b
= skb
->tail
;
1595 rta
= (struct rtattr
*)b
;
1596 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1597 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1598 goto rtattr_failure
;
1599 rta
->rta_len
= skb
->tail
- b
;
1600 spin_lock_bh(&sch
->dev
->queue_lock
);
1601 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1602 if (cbq_copy_xstats(skb
, &q
->link
.xstats
)) {
1603 spin_unlock_bh(&sch
->dev
->queue_lock
);
1604 goto rtattr_failure
;
1606 spin_unlock_bh(&sch
->dev
->queue_lock
);
1610 skb_trim(skb
, b
- skb
->data
);
1615 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1616 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1618 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1619 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1620 unsigned char *b
= skb
->tail
;
1624 tcm
->tcm_parent
= cl
->tparent
->classid
;
1626 tcm
->tcm_parent
= TC_H_ROOT
;
1627 tcm
->tcm_handle
= cl
->classid
;
1628 tcm
->tcm_info
= cl
->q
->handle
;
1630 rta
= (struct rtattr
*)b
;
1631 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1632 if (cbq_dump_attr(skb
, cl
) < 0)
1633 goto rtattr_failure
;
1634 rta
->rta_len
= skb
->tail
- b
;
1635 cl
->stats
.qlen
= cl
->q
->q
.qlen
;
1636 if (qdisc_copy_stats(skb
, &cl
->stats
))
1637 goto rtattr_failure
;
1638 spin_lock_bh(&sch
->dev
->queue_lock
);
1639 cl
->xstats
.avgidle
= cl
->avgidle
;
1640 cl
->xstats
.undertime
= 0;
1641 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1642 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1643 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1644 if (cbq_copy_xstats(skb
, &cl
->xstats
)) {
1645 spin_unlock_bh(&sch
->dev
->queue_lock
);
1646 goto rtattr_failure
;
1648 spin_unlock_bh(&sch
->dev
->queue_lock
);
1653 skb_trim(skb
, b
- skb
->data
);
1659 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1662 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1666 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)) == NULL
)
1669 #ifdef CONFIG_NET_CLS_POLICE
1670 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1671 new->reshape_fail
= cbq_reshape_fail
;
1678 sch_tree_unlock(sch
);
1685 static struct Qdisc
*
1686 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1688 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1690 return cl
? cl
->q
: NULL
;
1693 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1695 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1696 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1700 return (unsigned long)cl
;
1705 static void cbq_destroy_filters(struct cbq_class
*cl
)
1707 struct tcf_proto
*tp
;
1709 while ((tp
= cl
->filter_list
) != NULL
) {
1710 cl
->filter_list
= tp
->next
;
1711 tp
->ops
->destroy(tp
);
1715 static void cbq_destroy_class(struct cbq_class
*cl
)
1717 cbq_destroy_filters(cl
);
1718 qdisc_destroy(cl
->q
);
1719 qdisc_put_rtab(cl
->R_tab
);
1720 #ifdef CONFIG_NET_ESTIMATOR
1721 qdisc_kill_estimator(&cl
->stats
);
1727 cbq_destroy(struct Qdisc
* sch
)
1729 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1730 struct cbq_class
*cl
;
1733 #ifdef CONFIG_NET_CLS_POLICE
1736 for (h
= 0; h
< 16; h
++) {
1737 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1738 cbq_destroy_filters(cl
);
1741 for (h
= 0; h
< 16; h
++) {
1742 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
)
1744 cbq_destroy_class(cl
);
1747 qdisc_put_rtab(q
->link
.R_tab
);
1751 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1753 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1754 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1756 if (--cl
->refcnt
== 0) {
1757 #ifdef CONFIG_NET_CLS_POLICE
1758 spin_lock_bh(&sch
->dev
->queue_lock
);
1759 if (q
->rx_class
== cl
)
1761 spin_unlock_bh(&sch
->dev
->queue_lock
);
1764 cbq_destroy_class(cl
);
1769 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1773 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1774 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1775 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1776 struct rtattr
*tb
[TCA_CBQ_MAX
];
1777 struct cbq_class
*parent
;
1778 struct qdisc_rate_table
*rtab
= NULL
;
1781 rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)))
1784 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1785 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1788 if (tb
[TCA_CBQ_FOPT
-1] &&
1789 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1792 if (tb
[TCA_CBQ_RATE
-1] &&
1793 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1796 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1797 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1800 if (tb
[TCA_CBQ_WRROPT
-1] &&
1801 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1804 #ifdef CONFIG_NET_CLS_POLICE
1805 if (tb
[TCA_CBQ_POLICE
-1] &&
1806 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1813 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1815 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1819 if (tb
[TCA_CBQ_RATE
-1]) {
1820 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1825 /* Change class parameters */
1828 if (cl
->next_alive
!= NULL
)
1829 cbq_deactivate_class(cl
);
1832 rtab
= xchg(&cl
->R_tab
, rtab
);
1833 qdisc_put_rtab(rtab
);
1836 if (tb
[TCA_CBQ_LSSOPT
-1])
1837 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1839 if (tb
[TCA_CBQ_WRROPT
-1]) {
1841 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1844 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1845 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1847 #ifdef CONFIG_NET_CLS_POLICE
1848 if (tb
[TCA_CBQ_POLICE
-1])
1849 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1852 if (tb
[TCA_CBQ_FOPT
-1])
1853 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1856 cbq_activate_class(cl
);
1858 sch_tree_unlock(sch
);
1860 #ifdef CONFIG_NET_ESTIMATOR
1861 if (tca
[TCA_RATE
-1]) {
1862 qdisc_kill_estimator(&cl
->stats
);
1863 qdisc_new_estimator(&cl
->stats
, tca
[TCA_RATE
-1]);
1869 if (parentid
== TC_H_ROOT
)
1872 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1873 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1876 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1882 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1886 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1888 for (i
=0; i
<0x8000; i
++) {
1889 if (++q
->hgenerator
>= 0x8000)
1891 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1897 classid
= classid
|q
->hgenerator
;
1902 parent
= cbq_class_lookup(q
, parentid
);
1909 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
1912 memset(cl
, 0, sizeof(*cl
));
1916 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1917 cl
->q
= &noop_qdisc
;
1918 cl
->classid
= classid
;
1919 cl
->tparent
= parent
;
1921 cl
->allot
= parent
->allot
;
1922 cl
->quantum
= cl
->allot
;
1923 cl
->weight
= cl
->R_tab
->rate
.rate
;
1924 cl
->stats
.lock
= &sch
->dev
->queue_lock
;
1928 cl
->borrow
= cl
->tparent
;
1929 if (cl
->tparent
!= &q
->link
)
1930 cl
->share
= cl
->tparent
;
1931 cbq_adjust_levels(parent
);
1932 cl
->minidle
= -0x7FFFFFFF;
1933 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1934 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1935 if (cl
->ewma_log
==0)
1936 cl
->ewma_log
= q
->link
.ewma_log
;
1938 cl
->maxidle
= q
->link
.maxidle
;
1940 cl
->avpkt
= q
->link
.avpkt
;
1941 cl
->overlimit
= cbq_ovl_classic
;
1942 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1943 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1944 #ifdef CONFIG_NET_CLS_POLICE
1945 if (tb
[TCA_CBQ_POLICE
-1])
1946 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1948 if (tb
[TCA_CBQ_FOPT
-1])
1949 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1950 sch_tree_unlock(sch
);
1952 #ifdef CONFIG_NET_ESTIMATOR
1953 if (tca
[TCA_RATE
-1])
1954 qdisc_new_estimator(&cl
->stats
, tca
[TCA_RATE
-1]);
1957 *arg
= (unsigned long)cl
;
1961 qdisc_put_rtab(rtab
);
1965 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
1967 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
1968 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1970 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
1976 cbq_deactivate_class(cl
);
1978 if (q
->tx_borrowed
== cl
)
1979 q
->tx_borrowed
= q
->tx_class
;
1980 if (q
->tx_class
== cl
) {
1982 q
->tx_borrowed
= NULL
;
1984 #ifdef CONFIG_NET_CLS_POLICE
1985 if (q
->rx_class
== cl
)
1989 cbq_unlink_class(cl
);
1990 cbq_adjust_levels(cl
->tparent
);
1992 cbq_sync_defmap(cl
);
1995 sch_tree_unlock(sch
);
1997 if (--cl
->refcnt
== 0)
1998 cbq_destroy_class(cl
);
2003 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2005 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2006 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2011 return &cl
->filter_list
;
2014 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2017 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2018 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2019 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2022 if (p
&& p
->level
<= cl
->level
)
2025 return (unsigned long)cl
;
2030 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2032 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2037 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2039 struct cbq_sched_data
*q
= (struct cbq_sched_data
*)sch
->data
;
2045 for (h
= 0; h
< 16; h
++) {
2046 struct cbq_class
*cl
;
2048 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2049 if (arg
->count
< arg
->skip
) {
2053 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2062 static struct Qdisc_class_ops cbq_class_ops
=
2076 #ifdef CONFIG_RTNETLINK
2081 struct Qdisc_ops cbq_qdisc_ops
=
2086 sizeof(struct cbq_sched_data
),
2096 NULL
/* cbq_change */,
2098 #ifdef CONFIG_RTNETLINK
2104 int init_module(void)
2106 return register_qdisc(&cbq_qdisc_ops
);
2109 void cleanup_module(void)
2111 unregister_qdisc(&cbq_qdisc_ops
);