2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data
;
93 struct cbq_class
*next
; /* hash table link */
94 struct cbq_class
*next_alive
; /* next class with backlog in this priority band */
98 unsigned char priority
; /* class priority */
99 unsigned char priority2
; /* priority to be used after overlimit */
100 unsigned char ewma_log
; /* time constant for idle time calculation */
101 unsigned char ovl_strategy
;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police
;
108 /* Link-sharing scheduler parameters */
109 long maxidle
; /* Class parameters: see below. */
113 struct qdisc_rate_table
*R_tab
;
115 /* Overlimit strategy parameters */
116 void (*overlimit
)(struct cbq_class
*cl
);
119 /* General scheduler (WRR) parameters */
121 long quantum
; /* Allotment per WRR round */
122 long weight
; /* Relative allotment: see below */
124 struct Qdisc
*qdisc
; /* Ptr to CBQ discipline */
125 struct cbq_class
*split
; /* Ptr to split node */
126 struct cbq_class
*share
; /* Ptr to LS parent in the class tree */
127 struct cbq_class
*tparent
; /* Ptr to tree parent in the class tree */
128 struct cbq_class
*borrow
; /* NULL if class is bandwidth limited;
130 struct cbq_class
*sibling
; /* Sibling chain */
131 struct cbq_class
*children
; /* Pointer to children chain */
133 struct Qdisc
*q
; /* Elementary queueing discipline */
137 unsigned char cpriority
; /* Effective priority */
138 unsigned char delayed
;
139 unsigned char level
; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last
; /* Last end of service */
145 psched_time_t undertime
;
147 long deficit
; /* Saved deficit for WRR */
148 unsigned long penalized
;
149 struct tc_stats stats
;
150 spinlock_t
*stats_lock
;
151 struct tc_cbq_xstats xstats
;
153 struct tcf_proto
*filter_list
;
158 struct cbq_class
*defaults
[TC_PRIO_MAX
+1];
161 struct cbq_sched_data
163 struct cbq_class
*classes
[16]; /* Hash table of all classes */
164 int nclasses
[TC_CBQ_MAXPRIO
+1];
165 unsigned quanta
[TC_CBQ_MAXPRIO
+1];
167 struct cbq_class link
;
170 struct cbq_class
*active
[TC_CBQ_MAXPRIO
+1]; /* List of all classes
173 #ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class
*rx_class
;
176 struct cbq_class
*tx_class
;
177 struct cbq_class
*tx_borrowed
;
179 psched_time_t now
; /* Cached timestamp */
180 psched_time_t now_rt
; /* Cached real time */
183 struct timer_list delay_timer
;
184 struct timer_list wd_timer
; /* Watchdog timer,
194 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197 static __inline__
unsigned cbq_hash(u32 h
)
204 static __inline__
struct cbq_class
*
205 cbq_class_lookup(struct cbq_sched_data
*q
, u32 classid
)
207 struct cbq_class
*cl
;
209 for (cl
= q
->classes
[cbq_hash(classid
)]; cl
; cl
= cl
->next
)
210 if (cl
->classid
== classid
)
215 #ifdef CONFIG_NET_CLS_POLICE
217 static struct cbq_class
*
218 cbq_reclassify(struct sk_buff
*skb
, struct cbq_class
*this)
220 struct cbq_class
*cl
, *new;
222 for (cl
= this->tparent
; cl
; cl
= cl
->tparent
)
223 if ((new = cl
->defaults
[TC_PRIO_BESTEFFORT
]) != NULL
&& new != this)
231 /* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
241 static struct cbq_class
*
242 cbq_classify(struct sk_buff
*skb
, struct Qdisc
*sch
, int *qres
)
244 struct cbq_sched_data
*q
= qdisc_priv(sch
);
245 struct cbq_class
*head
= &q
->link
;
246 struct cbq_class
**defmap
;
247 struct cbq_class
*cl
= NULL
;
248 u32 prio
= skb
->priority
;
249 struct tcf_result res
;
252 * Step 1. If skb->priority points to one of our classes, use it.
254 if (TC_H_MAJ(prio
^sch
->handle
) == 0 &&
255 (cl
= cbq_class_lookup(q
, prio
)) != NULL
)
260 #ifdef CONFIG_NET_CLS_ACT
263 defmap
= head
->defaults
;
266 * Step 2+n. Apply classifier.
268 if (!head
->filter_list
|| (result
= tc_classify(skb
, head
->filter_list
, &res
)) < 0)
271 if ((cl
= (void*)res
.class) == NULL
) {
272 if (TC_H_MAJ(res
.classid
))
273 cl
= cbq_class_lookup(q
, res
.classid
);
274 else if ((cl
= defmap
[res
.classid
&TC_PRIO_MAX
]) == NULL
)
275 cl
= defmap
[TC_PRIO_BESTEFFORT
];
277 if (cl
== NULL
|| cl
->level
>= head
->level
)
281 #ifdef CONFIG_NET_CLS_ACT
283 case TC_ACT_SHOT
: /* Stop and kfree */
284 *qres
= NET_XMIT_DROP
;
291 case TC_ACT_RECLASSIFY
: /* Things look good */
303 #ifdef CONFIG_NET_CLS_POLICE
305 case TC_POLICE_RECLASSIFY
:
306 return cbq_reclassify(skb
, cl
);
318 * Step 3+n. If classifier selected a link sharing class,
319 * apply agency specific classifier.
320 * Repeat this procdure until we hit a leaf node.
329 * Step 4. No success...
331 if (TC_H_MAJ(prio
) == 0 &&
332 !(cl
= head
->defaults
[prio
&TC_PRIO_MAX
]) &&
333 !(cl
= head
->defaults
[TC_PRIO_BESTEFFORT
]))
340 A packet has just been enqueued on the empty class.
341 cbq_activate_class adds it to the tail of active class list
342 of its priority band.
345 static __inline__
void cbq_activate_class(struct cbq_class
*cl
)
347 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
348 int prio
= cl
->cpriority
;
349 struct cbq_class
*cl_tail
;
351 cl_tail
= q
->active
[prio
];
352 q
->active
[prio
] = cl
;
354 if (cl_tail
!= NULL
) {
355 cl
->next_alive
= cl_tail
->next_alive
;
356 cl_tail
->next_alive
= cl
;
359 q
->activemask
|= (1<<prio
);
364 Unlink class from active chain.
365 Note that this same procedure is done directly in cbq_dequeue*
366 during round-robin procedure.
369 static void cbq_deactivate_class(struct cbq_class
*this)
371 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
372 int prio
= this->cpriority
;
373 struct cbq_class
*cl
;
374 struct cbq_class
*cl_prev
= q
->active
[prio
];
377 cl
= cl_prev
->next_alive
;
379 cl_prev
->next_alive
= cl
->next_alive
;
380 cl
->next_alive
= NULL
;
382 if (cl
== q
->active
[prio
]) {
383 q
->active
[prio
] = cl_prev
;
384 if (cl
== q
->active
[prio
]) {
385 q
->active
[prio
] = NULL
;
386 q
->activemask
&= ~(1<<prio
);
391 cl
= cl_prev
->next_alive
;
394 } while ((cl_prev
= cl
) != q
->active
[prio
]);
398 cbq_mark_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
400 int toplevel
= q
->toplevel
;
402 if (toplevel
> cl
->level
&& !(cl
->q
->flags
&TCQ_F_THROTTLED
)) {
406 PSCHED_GET_TIME(now
);
407 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
408 PSCHED_TADD2(q
->now
, incr
, now
);
411 if (PSCHED_TLESS(cl
->undertime
, now
)) {
412 q
->toplevel
= cl
->level
;
415 } while ((cl
=cl
->borrow
) != NULL
&& toplevel
> cl
->level
);
420 cbq_enqueue(struct sk_buff
*skb
, struct Qdisc
*sch
)
422 struct cbq_sched_data
*q
= qdisc_priv(sch
);
424 int ret
= NET_XMIT_SUCCESS
;
425 struct cbq_class
*cl
= cbq_classify(skb
, sch
,&ret
);
427 #ifdef CONFIG_NET_CLS_POLICE
431 #ifdef CONFIG_NET_CLS_POLICE
432 cl
->q
->__parent
= sch
;
434 if ((ret
= cl
->q
->enqueue(skb
, cl
->q
)) == NET_XMIT_SUCCESS
) {
436 sch
->stats
.packets
++;
437 sch
->stats
.bytes
+=len
;
438 cbq_mark_toplevel(q
, cl
);
440 cbq_activate_class(cl
);
445 #ifndef CONFIG_NET_CLS_ACT
450 cbq_mark_toplevel(q
, cl
);
454 if ( NET_XMIT_DROP
== ret
) {
459 cbq_mark_toplevel(q
, cl
);
467 cbq_requeue(struct sk_buff
*skb
, struct Qdisc
*sch
)
469 struct cbq_sched_data
*q
= qdisc_priv(sch
);
470 struct cbq_class
*cl
;
473 if ((cl
= q
->tx_class
) == NULL
) {
480 cbq_mark_toplevel(q
, cl
);
482 #ifdef CONFIG_NET_CLS_POLICE
484 cl
->q
->__parent
= sch
;
486 if ((ret
= cl
->q
->ops
->requeue(skb
, cl
->q
)) == 0) {
489 cbq_activate_class(cl
);
497 /* Overlimit actions */
499 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
501 static void cbq_ovl_classic(struct cbq_class
*cl
)
503 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
504 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
507 delay
+= cl
->offtime
;
510 Class goes to sleep, so that it will have no
511 chance to work avgidle. Let's forgive it 8)
513 BTW cbq-2.0 has a crap in this
514 place, apparently they forgot to shift it by cl->ewma_log.
517 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
518 if (cl
->avgidle
< cl
->minidle
)
519 cl
->avgidle
= cl
->minidle
;
522 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
524 cl
->xstats
.overactions
++;
527 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
528 q
->wd_expires
= delay
;
530 /* Dirty work! We must schedule wakeups based on
531 real available rate, rather than leaf rate,
532 which may be tiny (even zero).
534 if (q
->toplevel
== TC_CBQ_MAXLEVEL
) {
536 psched_tdiff_t base_delay
= q
->wd_expires
;
538 for (b
= cl
->borrow
; b
; b
= b
->borrow
) {
539 delay
= PSCHED_TDIFF(b
->undertime
, q
->now
);
540 if (delay
< base_delay
) {
547 q
->wd_expires
= base_delay
;
551 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
555 static void cbq_ovl_rclassic(struct cbq_class
*cl
)
557 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
558 struct cbq_class
*this = cl
;
561 if (cl
->level
> q
->toplevel
) {
565 } while ((cl
= cl
->borrow
) != NULL
);
572 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
574 static void cbq_ovl_delay(struct cbq_class
*cl
)
576 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
577 psched_tdiff_t delay
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
580 unsigned long sched
= jiffies
;
582 delay
+= cl
->offtime
;
584 delay
-= (-cl
->avgidle
) - ((-cl
->avgidle
) >> cl
->ewma_log
);
585 if (cl
->avgidle
< cl
->minidle
)
586 cl
->avgidle
= cl
->minidle
;
587 PSCHED_TADD2(q
->now
, delay
, cl
->undertime
);
590 sched
+= PSCHED_US2JIFFIE(delay
) + cl
->penalty
;
591 cl
->penalized
= sched
;
592 cl
->cpriority
= TC_CBQ_MAXPRIO
;
593 q
->pmask
|= (1<<TC_CBQ_MAXPRIO
);
594 if (del_timer(&q
->delay_timer
) &&
595 (long)(q
->delay_timer
.expires
- sched
) > 0)
596 q
->delay_timer
.expires
= sched
;
597 add_timer(&q
->delay_timer
);
599 cl
->xstats
.overactions
++;
604 if (q
->wd_expires
== 0 || q
->wd_expires
> delay
)
605 q
->wd_expires
= delay
;
608 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
610 static void cbq_ovl_lowprio(struct cbq_class
*cl
)
612 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
614 cl
->penalized
= jiffies
+ cl
->penalty
;
616 if (cl
->cpriority
!= cl
->priority2
) {
617 cl
->cpriority
= cl
->priority2
;
618 q
->pmask
|= (1<<cl
->cpriority
);
619 cl
->xstats
.overactions
++;
624 /* TC_CBQ_OVL_DROP: penalize class by dropping */
626 static void cbq_ovl_drop(struct cbq_class
*cl
)
628 if (cl
->q
->ops
->drop
)
629 if (cl
->q
->ops
->drop(cl
->q
))
631 cl
->xstats
.overactions
++;
635 static void cbq_watchdog(unsigned long arg
)
637 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
639 sch
->flags
&= ~TCQ_F_THROTTLED
;
640 netif_schedule(sch
->dev
);
643 static unsigned long cbq_undelay_prio(struct cbq_sched_data
*q
, int prio
)
645 struct cbq_class
*cl
;
646 struct cbq_class
*cl_prev
= q
->active
[prio
];
647 unsigned long now
= jiffies
;
648 unsigned long sched
= now
;
654 cl
= cl_prev
->next_alive
;
655 if ((long)(now
- cl
->penalized
) > 0) {
656 cl_prev
->next_alive
= cl
->next_alive
;
657 cl
->next_alive
= NULL
;
658 cl
->cpriority
= cl
->priority
;
660 cbq_activate_class(cl
);
662 if (cl
== q
->active
[prio
]) {
663 q
->active
[prio
] = cl_prev
;
664 if (cl
== q
->active
[prio
]) {
665 q
->active
[prio
] = NULL
;
670 cl
= cl_prev
->next_alive
;
671 } else if ((long)(sched
- cl
->penalized
) > 0)
672 sched
= cl
->penalized
;
673 } while ((cl_prev
= cl
) != q
->active
[prio
]);
675 return (long)(sched
- now
);
678 static void cbq_undelay(unsigned long arg
)
680 struct Qdisc
*sch
= (struct Qdisc
*)arg
;
681 struct cbq_sched_data
*q
= qdisc_priv(sch
);
689 int prio
= ffz(~pmask
);
694 tmp
= cbq_undelay_prio(q
, prio
);
697 if (tmp
< delay
|| delay
== 0)
703 q
->delay_timer
.expires
= jiffies
+ delay
;
704 add_timer(&q
->delay_timer
);
707 sch
->flags
&= ~TCQ_F_THROTTLED
;
708 netif_schedule(sch
->dev
);
712 #ifdef CONFIG_NET_CLS_POLICE
714 static int cbq_reshape_fail(struct sk_buff
*skb
, struct Qdisc
*child
)
717 struct Qdisc
*sch
= child
->__parent
;
718 struct cbq_sched_data
*q
= qdisc_priv(sch
);
719 struct cbq_class
*cl
= q
->rx_class
;
723 if (cl
&& (cl
= cbq_reclassify(skb
, cl
)) != NULL
) {
725 cbq_mark_toplevel(q
, cl
);
728 cl
->q
->__parent
= sch
;
730 if (cl
->q
->enqueue(skb
, cl
->q
) == 0) {
732 sch
->stats
.packets
++;
733 sch
->stats
.bytes
+=len
;
735 cbq_activate_class(cl
);
748 It is mission critical procedure.
750 We "regenerate" toplevel cutoff, if transmitting class
751 has backlog and it is not regulated. It is not part of
752 original CBQ description, but looks more reasonable.
753 Probably, it is wrong. This question needs further investigation.
756 static __inline__
void
757 cbq_update_toplevel(struct cbq_sched_data
*q
, struct cbq_class
*cl
,
758 struct cbq_class
*borrowed
)
760 if (cl
&& q
->toplevel
>= borrowed
->level
) {
761 if (cl
->q
->q
.qlen
> 1) {
763 if (PSCHED_IS_PASTPERFECT(borrowed
->undertime
)) {
764 q
->toplevel
= borrowed
->level
;
767 } while ((borrowed
=borrowed
->borrow
) != NULL
);
770 /* It is not necessary now. Uncommenting it
771 will save CPU cycles, but decrease fairness.
773 q
->toplevel
= TC_CBQ_MAXLEVEL
;
779 cbq_update(struct cbq_sched_data
*q
)
781 struct cbq_class
*this = q
->tx_class
;
782 struct cbq_class
*cl
= this;
787 for ( ; cl
; cl
= cl
->share
) {
788 long avgidle
= cl
->avgidle
;
792 cl
->stats
.bytes
+= len
;
795 (now - last) is total time between packet right edges.
796 (last_pktlen/rate) is "virtual" busy time, so that
798 idle = (now - last) - last_pktlen/rate
801 idle
= PSCHED_TDIFF(q
->now
, cl
->last
);
802 if ((unsigned long)idle
> 128*1024*1024) {
803 avgidle
= cl
->maxidle
;
805 idle
-= L2T(cl
, len
);
807 /* true_avgidle := (1-W)*true_avgidle + W*idle,
808 where W=2^{-ewma_log}. But cl->avgidle is scaled:
809 cl->avgidle == true_avgidle/W,
812 avgidle
+= idle
- (avgidle
>>cl
->ewma_log
);
816 /* Overlimit or at-limit */
818 if (avgidle
< cl
->minidle
)
819 avgidle
= cl
->minidle
;
821 cl
->avgidle
= avgidle
;
823 /* Calculate expected time, when this class
824 will be allowed to send.
826 (1-W)*true_avgidle + W*delay = 0, i.e.
827 idle = (1/W - 1)*(-true_avgidle)
829 idle = (1 - W)*(-cl->avgidle);
831 idle
= (-avgidle
) - ((-avgidle
) >> cl
->ewma_log
);
835 To maintain the rate allocated to the class,
836 we add to undertime virtual clock,
837 necessary to complete transmitted packet.
838 (len/phys_bandwidth has been already passed
839 to the moment of cbq_update)
842 idle
-= L2T(&q
->link
, len
);
843 idle
+= L2T(cl
, len
);
845 PSCHED_AUDIT_TDIFF(idle
);
847 PSCHED_TADD2(q
->now
, idle
, cl
->undertime
);
851 PSCHED_SET_PASTPERFECT(cl
->undertime
);
852 if (avgidle
> cl
->maxidle
)
853 cl
->avgidle
= cl
->maxidle
;
855 cl
->avgidle
= avgidle
;
860 cbq_update_toplevel(q
, this, q
->tx_borrowed
);
863 static __inline__
struct cbq_class
*
864 cbq_under_limit(struct cbq_class
*cl
)
866 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
867 struct cbq_class
*this_cl
= cl
;
869 if (cl
->tparent
== NULL
)
872 if (PSCHED_IS_PASTPERFECT(cl
->undertime
) ||
873 !PSCHED_TLESS(q
->now
, cl
->undertime
)) {
879 /* It is very suspicious place. Now overlimit
880 action is generated for not bounded classes
881 only if link is completely congested.
882 Though it is in agree with ancestor-only paradigm,
883 it looks very stupid. Particularly,
884 it means that this chunk of code will either
885 never be called or result in strong amplification
886 of burstiness. Dangerous, silly, and, however,
887 no another solution exists.
889 if ((cl
= cl
->borrow
) == NULL
) {
890 this_cl
->stats
.overlimits
++;
891 this_cl
->overlimit(this_cl
);
894 if (cl
->level
> q
->toplevel
)
896 } while (!PSCHED_IS_PASTPERFECT(cl
->undertime
) &&
897 PSCHED_TLESS(q
->now
, cl
->undertime
));
903 static __inline__
struct sk_buff
*
904 cbq_dequeue_prio(struct Qdisc
*sch
, int prio
)
906 struct cbq_sched_data
*q
= qdisc_priv(sch
);
907 struct cbq_class
*cl_tail
, *cl_prev
, *cl
;
911 cl_tail
= cl_prev
= q
->active
[prio
];
912 cl
= cl_prev
->next_alive
;
919 struct cbq_class
*borrow
= cl
;
922 (borrow
= cbq_under_limit(cl
)) == NULL
)
925 if (cl
->deficit
<= 0) {
926 /* Class exhausted its allotment per
927 this round. Switch to the next one.
930 cl
->deficit
+= cl
->quantum
;
934 skb
= cl
->q
->dequeue(cl
->q
);
936 /* Class did not give us any skb :-(
937 It could occur even if cl->q->q.qlen != 0
938 f.e. if cl->q == "tbf"
943 cl
->deficit
-= skb
->len
;
945 q
->tx_borrowed
= borrow
;
947 #ifndef CBQ_XSTATS_BORROWS_BYTES
948 borrow
->xstats
.borrows
++;
949 cl
->xstats
.borrows
++;
951 borrow
->xstats
.borrows
+= skb
->len
;
952 cl
->xstats
.borrows
+= skb
->len
;
955 q
->tx_len
= skb
->len
;
957 if (cl
->deficit
<= 0) {
958 q
->active
[prio
] = cl
;
960 cl
->deficit
+= cl
->quantum
;
965 if (cl
->q
->q
.qlen
== 0 || prio
!= cl
->cpriority
) {
966 /* Class is empty or penalized.
967 Unlink it from active chain.
969 cl_prev
->next_alive
= cl
->next_alive
;
970 cl
->next_alive
= NULL
;
972 /* Did cl_tail point to it? */
977 /* Was it the last class in this band? */
980 q
->active
[prio
] = NULL
;
981 q
->activemask
&= ~(1<<prio
);
983 cbq_activate_class(cl
);
987 q
->active
[prio
] = cl_tail
;
990 cbq_activate_class(cl
);
998 } while (cl_prev
!= cl_tail
);
1001 q
->active
[prio
] = cl_prev
;
1006 static __inline__
struct sk_buff
*
1007 cbq_dequeue_1(struct Qdisc
*sch
)
1009 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1010 struct sk_buff
*skb
;
1011 unsigned activemask
;
1013 activemask
= q
->activemask
&0xFF;
1014 while (activemask
) {
1015 int prio
= ffz(~activemask
);
1016 activemask
&= ~(1<<prio
);
1017 skb
= cbq_dequeue_prio(sch
, prio
);
1024 static struct sk_buff
*
1025 cbq_dequeue(struct Qdisc
*sch
)
1027 struct sk_buff
*skb
;
1028 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1030 psched_tdiff_t incr
;
1032 PSCHED_GET_TIME(now
);
1033 incr
= PSCHED_TDIFF(now
, q
->now_rt
);
1036 psched_tdiff_t incr2
;
1037 /* Time integrator. We calculate EOS time
1038 by adding expected packet transmission time.
1039 If real time is greater, we warp artificial clock,
1042 cbq_time = max(real_time, work);
1044 incr2
= L2T(&q
->link
, q
->tx_len
);
1045 PSCHED_TADD(q
->now
, incr2
);
1047 if ((incr
-= incr2
) < 0)
1050 PSCHED_TADD(q
->now
, incr
);
1056 skb
= cbq_dequeue_1(sch
);
1059 sch
->flags
&= ~TCQ_F_THROTTLED
;
1063 /* All the classes are overlimit.
1067 1. Scheduler is empty.
1068 2. Toplevel cutoff inhibited borrowing.
1069 3. Root class is overlimit.
1071 Reset 2d and 3d conditions and retry.
1073 Note, that NS and cbq-2.0 are buggy, peeking
1074 an arbitrary class is appropriate for ancestor-only
1075 sharing, but not for toplevel algorithm.
1077 Our version is better, but slower, because it requires
1078 two passes, but it is unavoidable with top-level sharing.
1081 if (q
->toplevel
== TC_CBQ_MAXLEVEL
&&
1082 PSCHED_IS_PASTPERFECT(q
->link
.undertime
))
1085 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1086 PSCHED_SET_PASTPERFECT(q
->link
.undertime
);
1089 /* No packets in scheduler or nobody wants to give them to us :-(
1090 Sigh... start watchdog timer in the last case. */
1093 sch
->stats
.overlimits
++;
1094 if (q
->wd_expires
) {
1095 long delay
= PSCHED_US2JIFFIE(q
->wd_expires
);
1098 mod_timer(&q
->wd_timer
, jiffies
+ delay
);
1099 sch
->flags
|= TCQ_F_THROTTLED
;
1105 /* CBQ class maintanance routines */
1107 static void cbq_adjust_levels(struct cbq_class
*this)
1114 struct cbq_class
*cl
;
1116 if ((cl
= this->children
) != NULL
) {
1118 if (cl
->level
> level
)
1120 } while ((cl
= cl
->sibling
) != this->children
);
1122 this->level
= level
+1;
1123 } while ((this = this->tparent
) != NULL
);
1126 static void cbq_normalize_quanta(struct cbq_sched_data
*q
, int prio
)
1128 struct cbq_class
*cl
;
1131 if (q
->quanta
[prio
] == 0)
1134 for (h
=0; h
<16; h
++) {
1135 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1136 /* BUGGGG... Beware! This expression suffer of
1137 arithmetic overflows!
1139 if (cl
->priority
== prio
) {
1140 cl
->quantum
= (cl
->weight
*cl
->allot
*q
->nclasses
[prio
])/
1143 if (cl
->quantum
<= 0 || cl
->quantum
>32*cl
->qdisc
->dev
->mtu
) {
1144 printk(KERN_WARNING
"CBQ: class %08x has bad quantum==%ld, repaired.\n", cl
->classid
, cl
->quantum
);
1145 cl
->quantum
= cl
->qdisc
->dev
->mtu
/2 + 1;
1151 static void cbq_sync_defmap(struct cbq_class
*cl
)
1153 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1154 struct cbq_class
*split
= cl
->split
;
1161 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1162 if (split
->defaults
[i
] == cl
&& !(cl
->defmap
&(1<<i
)))
1163 split
->defaults
[i
] = NULL
;
1166 for (i
=0; i
<=TC_PRIO_MAX
; i
++) {
1167 int level
= split
->level
;
1169 if (split
->defaults
[i
])
1172 for (h
=0; h
<16; h
++) {
1173 struct cbq_class
*c
;
1175 for (c
= q
->classes
[h
]; c
; c
= c
->next
) {
1176 if (c
->split
== split
&& c
->level
< level
&&
1178 split
->defaults
[i
] = c
;
1186 static void cbq_change_defmap(struct cbq_class
*cl
, u32 splitid
, u32 def
, u32 mask
)
1188 struct cbq_class
*split
= NULL
;
1191 if ((split
= cl
->split
) == NULL
)
1193 splitid
= split
->classid
;
1196 if (split
== NULL
|| split
->classid
!= splitid
) {
1197 for (split
= cl
->tparent
; split
; split
= split
->tparent
)
1198 if (split
->classid
== splitid
)
1205 if (cl
->split
!= split
) {
1207 cbq_sync_defmap(cl
);
1209 cl
->defmap
= def
&mask
;
1211 cl
->defmap
= (cl
->defmap
&~mask
)|(def
&mask
);
1213 cbq_sync_defmap(cl
);
1216 static void cbq_unlink_class(struct cbq_class
*this)
1218 struct cbq_class
*cl
, **clp
;
1219 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1221 for (clp
= &q
->classes
[cbq_hash(this->classid
)]; (cl
= *clp
) != NULL
; clp
= &cl
->next
) {
1229 if (this->tparent
) {
1238 } while ((cl
= *clp
) != this->sibling
);
1240 if (this->tparent
->children
== this) {
1241 this->tparent
->children
= this->sibling
;
1242 if (this->sibling
== this)
1243 this->tparent
->children
= NULL
;
1246 BUG_TRAP(this->sibling
== this);
1250 static void cbq_link_class(struct cbq_class
*this)
1252 struct cbq_sched_data
*q
= qdisc_priv(this->qdisc
);
1253 unsigned h
= cbq_hash(this->classid
);
1254 struct cbq_class
*parent
= this->tparent
;
1256 this->sibling
= this;
1257 this->next
= q
->classes
[h
];
1258 q
->classes
[h
] = this;
1263 if (parent
->children
== NULL
) {
1264 parent
->children
= this;
1266 this->sibling
= parent
->children
->sibling
;
1267 parent
->children
->sibling
= this;
1271 static unsigned int cbq_drop(struct Qdisc
* sch
)
1273 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1274 struct cbq_class
*cl
, *cl_head
;
1278 for (prio
= TC_CBQ_MAXPRIO
; prio
>= 0; prio
--) {
1279 if ((cl_head
= q
->active
[prio
]) == NULL
)
1284 if (cl
->q
->ops
->drop
&& (len
= cl
->q
->ops
->drop(cl
->q
))) {
1288 } while ((cl
= cl
->next_alive
) != cl_head
);
1294 cbq_reset(struct Qdisc
* sch
)
1296 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1297 struct cbq_class
*cl
;
1304 q
->tx_borrowed
= NULL
;
1305 del_timer(&q
->wd_timer
);
1306 del_timer(&q
->delay_timer
);
1307 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1308 PSCHED_GET_TIME(q
->now
);
1311 for (prio
= 0; prio
<= TC_CBQ_MAXPRIO
; prio
++)
1312 q
->active
[prio
] = NULL
;
1314 for (h
= 0; h
< 16; h
++) {
1315 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
1318 cl
->next_alive
= NULL
;
1319 PSCHED_SET_PASTPERFECT(cl
->undertime
);
1320 cl
->avgidle
= cl
->maxidle
;
1321 cl
->deficit
= cl
->quantum
;
1322 cl
->cpriority
= cl
->priority
;
1329 static int cbq_set_lss(struct cbq_class
*cl
, struct tc_cbq_lssopt
*lss
)
1331 if (lss
->change
&TCF_CBQ_LSS_FLAGS
) {
1332 cl
->share
= (lss
->flags
&TCF_CBQ_LSS_ISOLATED
) ? NULL
: cl
->tparent
;
1333 cl
->borrow
= (lss
->flags
&TCF_CBQ_LSS_BOUNDED
) ? NULL
: cl
->tparent
;
1335 if (lss
->change
&TCF_CBQ_LSS_EWMA
)
1336 cl
->ewma_log
= lss
->ewma_log
;
1337 if (lss
->change
&TCF_CBQ_LSS_AVPKT
)
1338 cl
->avpkt
= lss
->avpkt
;
1339 if (lss
->change
&TCF_CBQ_LSS_MINIDLE
)
1340 cl
->minidle
= -(long)lss
->minidle
;
1341 if (lss
->change
&TCF_CBQ_LSS_MAXIDLE
) {
1342 cl
->maxidle
= lss
->maxidle
;
1343 cl
->avgidle
= lss
->maxidle
;
1345 if (lss
->change
&TCF_CBQ_LSS_OFFTIME
)
1346 cl
->offtime
= lss
->offtime
;
1350 static void cbq_rmprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1352 q
->nclasses
[cl
->priority
]--;
1353 q
->quanta
[cl
->priority
] -= cl
->weight
;
1354 cbq_normalize_quanta(q
, cl
->priority
);
1357 static void cbq_addprio(struct cbq_sched_data
*q
, struct cbq_class
*cl
)
1359 q
->nclasses
[cl
->priority
]++;
1360 q
->quanta
[cl
->priority
] += cl
->weight
;
1361 cbq_normalize_quanta(q
, cl
->priority
);
1364 static int cbq_set_wrr(struct cbq_class
*cl
, struct tc_cbq_wrropt
*wrr
)
1366 struct cbq_sched_data
*q
= qdisc_priv(cl
->qdisc
);
1369 cl
->allot
= wrr
->allot
;
1371 cl
->weight
= wrr
->weight
;
1372 if (wrr
->priority
) {
1373 cl
->priority
= wrr
->priority
-1;
1374 cl
->cpriority
= cl
->priority
;
1375 if (cl
->priority
>= cl
->priority2
)
1376 cl
->priority2
= TC_CBQ_MAXPRIO
-1;
1383 static int cbq_set_overlimit(struct cbq_class
*cl
, struct tc_cbq_ovl
*ovl
)
1385 switch (ovl
->strategy
) {
1386 case TC_CBQ_OVL_CLASSIC
:
1387 cl
->overlimit
= cbq_ovl_classic
;
1389 case TC_CBQ_OVL_DELAY
:
1390 cl
->overlimit
= cbq_ovl_delay
;
1392 case TC_CBQ_OVL_LOWPRIO
:
1393 if (ovl
->priority2
-1 >= TC_CBQ_MAXPRIO
||
1394 ovl
->priority2
-1 <= cl
->priority
)
1396 cl
->priority2
= ovl
->priority2
-1;
1397 cl
->overlimit
= cbq_ovl_lowprio
;
1399 case TC_CBQ_OVL_DROP
:
1400 cl
->overlimit
= cbq_ovl_drop
;
1402 case TC_CBQ_OVL_RCLASSIC
:
1403 cl
->overlimit
= cbq_ovl_rclassic
;
1408 cl
->penalty
= (ovl
->penalty
*HZ
)/1000;
1412 #ifdef CONFIG_NET_CLS_POLICE
1413 static int cbq_set_police(struct cbq_class
*cl
, struct tc_cbq_police
*p
)
1415 cl
->police
= p
->police
;
1417 if (cl
->q
->handle
) {
1418 if (p
->police
== TC_POLICE_RECLASSIFY
)
1419 cl
->q
->reshape_fail
= cbq_reshape_fail
;
1421 cl
->q
->reshape_fail
= NULL
;
1427 static int cbq_set_fopt(struct cbq_class
*cl
, struct tc_cbq_fopt
*fopt
)
1429 cbq_change_defmap(cl
, fopt
->split
, fopt
->defmap
, fopt
->defchange
);
1433 static int cbq_init(struct Qdisc
*sch
, struct rtattr
*opt
)
1435 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1436 struct rtattr
*tb
[TCA_CBQ_MAX
];
1437 struct tc_ratespec
*r
;
1439 if (rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)) < 0 ||
1440 tb
[TCA_CBQ_RTAB
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1441 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1444 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1445 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1448 r
= RTA_DATA(tb
[TCA_CBQ_RATE
-1]);
1450 if ((q
->link
.R_tab
= qdisc_get_rtab(r
, tb
[TCA_CBQ_RTAB
-1])) == NULL
)
1454 q
->link
.sibling
= &q
->link
;
1455 q
->link
.classid
= sch
->handle
;
1456 q
->link
.qdisc
= sch
;
1457 if (!(q
->link
.q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1458 q
->link
.q
= &noop_qdisc
;
1460 q
->link
.priority
= TC_CBQ_MAXPRIO
-1;
1461 q
->link
.priority2
= TC_CBQ_MAXPRIO
-1;
1462 q
->link
.cpriority
= TC_CBQ_MAXPRIO
-1;
1463 q
->link
.ovl_strategy
= TC_CBQ_OVL_CLASSIC
;
1464 q
->link
.overlimit
= cbq_ovl_classic
;
1465 q
->link
.allot
= psched_mtu(sch
->dev
);
1466 q
->link
.quantum
= q
->link
.allot
;
1467 q
->link
.weight
= q
->link
.R_tab
->rate
.rate
;
1469 q
->link
.ewma_log
= TC_CBQ_DEF_EWMA
;
1470 q
->link
.avpkt
= q
->link
.allot
/2;
1471 q
->link
.minidle
= -0x7FFFFFFF;
1472 q
->link
.stats_lock
= &sch
->dev
->queue_lock
;
1474 init_timer(&q
->wd_timer
);
1475 q
->wd_timer
.data
= (unsigned long)sch
;
1476 q
->wd_timer
.function
= cbq_watchdog
;
1477 init_timer(&q
->delay_timer
);
1478 q
->delay_timer
.data
= (unsigned long)sch
;
1479 q
->delay_timer
.function
= cbq_undelay
;
1480 q
->toplevel
= TC_CBQ_MAXLEVEL
;
1481 PSCHED_GET_TIME(q
->now
);
1484 cbq_link_class(&q
->link
);
1486 if (tb
[TCA_CBQ_LSSOPT
-1])
1487 cbq_set_lss(&q
->link
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1489 cbq_addprio(q
, &q
->link
);
1493 static __inline__
int cbq_dump_rate(struct sk_buff
*skb
, struct cbq_class
*cl
)
1495 unsigned char *b
= skb
->tail
;
1497 RTA_PUT(skb
, TCA_CBQ_RATE
, sizeof(cl
->R_tab
->rate
), &cl
->R_tab
->rate
);
1501 skb_trim(skb
, b
- skb
->data
);
1505 static __inline__
int cbq_dump_lss(struct sk_buff
*skb
, struct cbq_class
*cl
)
1507 unsigned char *b
= skb
->tail
;
1508 struct tc_cbq_lssopt opt
;
1511 if (cl
->borrow
== NULL
)
1512 opt
.flags
|= TCF_CBQ_LSS_BOUNDED
;
1513 if (cl
->share
== NULL
)
1514 opt
.flags
|= TCF_CBQ_LSS_ISOLATED
;
1515 opt
.ewma_log
= cl
->ewma_log
;
1516 opt
.level
= cl
->level
;
1517 opt
.avpkt
= cl
->avpkt
;
1518 opt
.maxidle
= cl
->maxidle
;
1519 opt
.minidle
= (u32
)(-cl
->minidle
);
1520 opt
.offtime
= cl
->offtime
;
1522 RTA_PUT(skb
, TCA_CBQ_LSSOPT
, sizeof(opt
), &opt
);
1526 skb_trim(skb
, b
- skb
->data
);
1530 static __inline__
int cbq_dump_wrr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1532 unsigned char *b
= skb
->tail
;
1533 struct tc_cbq_wrropt opt
;
1536 opt
.allot
= cl
->allot
;
1537 opt
.priority
= cl
->priority
+1;
1538 opt
.cpriority
= cl
->cpriority
+1;
1539 opt
.weight
= cl
->weight
;
1540 RTA_PUT(skb
, TCA_CBQ_WRROPT
, sizeof(opt
), &opt
);
1544 skb_trim(skb
, b
- skb
->data
);
1548 static __inline__
int cbq_dump_ovl(struct sk_buff
*skb
, struct cbq_class
*cl
)
1550 unsigned char *b
= skb
->tail
;
1551 struct tc_cbq_ovl opt
;
1553 opt
.strategy
= cl
->ovl_strategy
;
1554 opt
.priority2
= cl
->priority2
+1;
1555 opt
.penalty
= (cl
->penalty
*1000)/HZ
;
1556 RTA_PUT(skb
, TCA_CBQ_OVL_STRATEGY
, sizeof(opt
), &opt
);
1560 skb_trim(skb
, b
- skb
->data
);
1564 static __inline__
int cbq_dump_fopt(struct sk_buff
*skb
, struct cbq_class
*cl
)
1566 unsigned char *b
= skb
->tail
;
1567 struct tc_cbq_fopt opt
;
1569 if (cl
->split
|| cl
->defmap
) {
1570 opt
.split
= cl
->split
? cl
->split
->classid
: 0;
1571 opt
.defmap
= cl
->defmap
;
1573 RTA_PUT(skb
, TCA_CBQ_FOPT
, sizeof(opt
), &opt
);
1578 skb_trim(skb
, b
- skb
->data
);
1582 #ifdef CONFIG_NET_CLS_POLICE
1583 static __inline__
int cbq_dump_police(struct sk_buff
*skb
, struct cbq_class
*cl
)
1585 unsigned char *b
= skb
->tail
;
1586 struct tc_cbq_police opt
;
1589 opt
.police
= cl
->police
;
1590 RTA_PUT(skb
, TCA_CBQ_POLICE
, sizeof(opt
), &opt
);
1595 skb_trim(skb
, b
- skb
->data
);
1600 static int cbq_dump_attr(struct sk_buff
*skb
, struct cbq_class
*cl
)
1602 if (cbq_dump_lss(skb
, cl
) < 0 ||
1603 cbq_dump_rate(skb
, cl
) < 0 ||
1604 cbq_dump_wrr(skb
, cl
) < 0 ||
1605 cbq_dump_ovl(skb
, cl
) < 0 ||
1606 #ifdef CONFIG_NET_CLS_POLICE
1607 cbq_dump_police(skb
, cl
) < 0 ||
1609 cbq_dump_fopt(skb
, cl
) < 0)
1614 int cbq_copy_xstats(struct sk_buff
*skb
, struct tc_cbq_xstats
*st
)
1616 RTA_PUT(skb
, TCA_XSTATS
, sizeof(*st
), st
);
1624 static int cbq_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
1626 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1627 unsigned char *b
= skb
->tail
;
1630 rta
= (struct rtattr
*)b
;
1631 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1632 if (cbq_dump_attr(skb
, &q
->link
) < 0)
1633 goto rtattr_failure
;
1634 rta
->rta_len
= skb
->tail
- b
;
1635 spin_lock_bh(&sch
->dev
->queue_lock
);
1636 q
->link
.xstats
.avgidle
= q
->link
.avgidle
;
1637 if (cbq_copy_xstats(skb
, &q
->link
.xstats
)) {
1638 spin_unlock_bh(&sch
->dev
->queue_lock
);
1639 goto rtattr_failure
;
1641 spin_unlock_bh(&sch
->dev
->queue_lock
);
1645 skb_trim(skb
, b
- skb
->data
);
1650 cbq_dump_class(struct Qdisc
*sch
, unsigned long arg
,
1651 struct sk_buff
*skb
, struct tcmsg
*tcm
)
1653 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1654 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1655 unsigned char *b
= skb
->tail
;
1659 tcm
->tcm_parent
= cl
->tparent
->classid
;
1661 tcm
->tcm_parent
= TC_H_ROOT
;
1662 tcm
->tcm_handle
= cl
->classid
;
1663 tcm
->tcm_info
= cl
->q
->handle
;
1665 rta
= (struct rtattr
*)b
;
1666 RTA_PUT(skb
, TCA_OPTIONS
, 0, NULL
);
1667 if (cbq_dump_attr(skb
, cl
) < 0)
1668 goto rtattr_failure
;
1669 rta
->rta_len
= skb
->tail
- b
;
1670 cl
->stats
.qlen
= cl
->q
->q
.qlen
;
1671 if (qdisc_copy_stats(skb
, &cl
->stats
, cl
->stats_lock
))
1672 goto rtattr_failure
;
1673 spin_lock_bh(&sch
->dev
->queue_lock
);
1674 cl
->xstats
.avgidle
= cl
->avgidle
;
1675 cl
->xstats
.undertime
= 0;
1676 if (!PSCHED_IS_PASTPERFECT(cl
->undertime
))
1677 cl
->xstats
.undertime
= PSCHED_TDIFF(cl
->undertime
, q
->now
);
1678 if (cbq_copy_xstats(skb
, &cl
->xstats
)) {
1679 spin_unlock_bh(&sch
->dev
->queue_lock
);
1680 goto rtattr_failure
;
1682 spin_unlock_bh(&sch
->dev
->queue_lock
);
1687 skb_trim(skb
, b
- skb
->data
);
1691 static int cbq_graft(struct Qdisc
*sch
, unsigned long arg
, struct Qdisc
*new,
1694 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1698 if ((new = qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)) == NULL
)
1701 #ifdef CONFIG_NET_CLS_POLICE
1702 if (cl
->police
== TC_POLICE_RECLASSIFY
)
1703 new->reshape_fail
= cbq_reshape_fail
;
1709 sch
->q
.qlen
-= (*old
)->q
.qlen
;
1711 sch_tree_unlock(sch
);
1718 static struct Qdisc
*
1719 cbq_leaf(struct Qdisc
*sch
, unsigned long arg
)
1721 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1723 return cl
? cl
->q
: NULL
;
1726 static unsigned long cbq_get(struct Qdisc
*sch
, u32 classid
)
1728 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1729 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
1733 return (unsigned long)cl
;
1738 static void cbq_destroy_filters(struct cbq_class
*cl
)
1740 struct tcf_proto
*tp
;
1742 while ((tp
= cl
->filter_list
) != NULL
) {
1743 cl
->filter_list
= tp
->next
;
1748 static void cbq_destroy_class(struct Qdisc
*sch
, struct cbq_class
*cl
)
1750 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1752 cbq_destroy_filters(cl
);
1753 qdisc_destroy(cl
->q
);
1754 qdisc_put_rtab(cl
->R_tab
);
1755 #ifdef CONFIG_NET_ESTIMATOR
1756 qdisc_kill_estimator(&cl
->stats
);
1763 cbq_destroy(struct Qdisc
* sch
)
1765 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1766 struct cbq_class
*cl
;
1769 #ifdef CONFIG_NET_CLS_POLICE
1773 for (h
= 0; h
< 16; h
++) {
1774 struct cbq_class
*next
;
1776 for (cl
= q
->classes
[h
]; cl
; cl
= next
) {
1778 cbq_destroy_class(sch
, cl
);
1783 static void cbq_put(struct Qdisc
*sch
, unsigned long arg
)
1785 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
1787 if (--cl
->refcnt
== 0) {
1788 #ifdef CONFIG_NET_CLS_POLICE
1789 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1791 spin_lock_bh(&sch
->dev
->queue_lock
);
1792 if (q
->rx_class
== cl
)
1794 spin_unlock_bh(&sch
->dev
->queue_lock
);
1797 cbq_destroy_class(sch
, cl
);
1802 cbq_change_class(struct Qdisc
*sch
, u32 classid
, u32 parentid
, struct rtattr
**tca
,
1806 struct cbq_sched_data
*q
= qdisc_priv(sch
);
1807 struct cbq_class
*cl
= (struct cbq_class
*)*arg
;
1808 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
1809 struct rtattr
*tb
[TCA_CBQ_MAX
];
1810 struct cbq_class
*parent
;
1811 struct qdisc_rate_table
*rtab
= NULL
;
1814 rtattr_parse(tb
, TCA_CBQ_MAX
, RTA_DATA(opt
), RTA_PAYLOAD(opt
)))
1817 if (tb
[TCA_CBQ_OVL_STRATEGY
-1] &&
1818 RTA_PAYLOAD(tb
[TCA_CBQ_OVL_STRATEGY
-1]) < sizeof(struct tc_cbq_ovl
))
1821 if (tb
[TCA_CBQ_FOPT
-1] &&
1822 RTA_PAYLOAD(tb
[TCA_CBQ_FOPT
-1]) < sizeof(struct tc_cbq_fopt
))
1825 if (tb
[TCA_CBQ_RATE
-1] &&
1826 RTA_PAYLOAD(tb
[TCA_CBQ_RATE
-1]) < sizeof(struct tc_ratespec
))
1829 if (tb
[TCA_CBQ_LSSOPT
-1] &&
1830 RTA_PAYLOAD(tb
[TCA_CBQ_LSSOPT
-1]) < sizeof(struct tc_cbq_lssopt
))
1833 if (tb
[TCA_CBQ_WRROPT
-1] &&
1834 RTA_PAYLOAD(tb
[TCA_CBQ_WRROPT
-1]) < sizeof(struct tc_cbq_wrropt
))
1837 #ifdef CONFIG_NET_CLS_POLICE
1838 if (tb
[TCA_CBQ_POLICE
-1] &&
1839 RTA_PAYLOAD(tb
[TCA_CBQ_POLICE
-1]) < sizeof(struct tc_cbq_police
))
1846 if (cl
->tparent
&& cl
->tparent
->classid
!= parentid
)
1848 if (!cl
->tparent
&& parentid
!= TC_H_ROOT
)
1852 if (tb
[TCA_CBQ_RATE
-1]) {
1853 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1858 /* Change class parameters */
1861 if (cl
->next_alive
!= NULL
)
1862 cbq_deactivate_class(cl
);
1865 rtab
= xchg(&cl
->R_tab
, rtab
);
1866 qdisc_put_rtab(rtab
);
1869 if (tb
[TCA_CBQ_LSSOPT
-1])
1870 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1872 if (tb
[TCA_CBQ_WRROPT
-1]) {
1874 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1877 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1878 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1880 #ifdef CONFIG_NET_CLS_POLICE
1881 if (tb
[TCA_CBQ_POLICE
-1])
1882 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1885 if (tb
[TCA_CBQ_FOPT
-1])
1886 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1889 cbq_activate_class(cl
);
1891 sch_tree_unlock(sch
);
1893 #ifdef CONFIG_NET_ESTIMATOR
1894 if (tca
[TCA_RATE
-1]) {
1895 qdisc_kill_estimator(&cl
->stats
);
1896 qdisc_new_estimator(&cl
->stats
, cl
->stats_lock
,
1903 if (parentid
== TC_H_ROOT
)
1906 if (tb
[TCA_CBQ_WRROPT
-1] == NULL
|| tb
[TCA_CBQ_RATE
-1] == NULL
||
1907 tb
[TCA_CBQ_LSSOPT
-1] == NULL
)
1910 rtab
= qdisc_get_rtab(RTA_DATA(tb
[TCA_CBQ_RATE
-1]), tb
[TCA_CBQ_RTAB
-1]);
1916 if (TC_H_MAJ(classid
^sch
->handle
) || cbq_class_lookup(q
, classid
))
1920 classid
= TC_H_MAKE(sch
->handle
,0x8000);
1922 for (i
=0; i
<0x8000; i
++) {
1923 if (++q
->hgenerator
>= 0x8000)
1925 if (cbq_class_lookup(q
, classid
|q
->hgenerator
) == NULL
)
1931 classid
= classid
|q
->hgenerator
;
1936 parent
= cbq_class_lookup(q
, parentid
);
1943 cl
= kmalloc(sizeof(*cl
), GFP_KERNEL
);
1946 memset(cl
, 0, sizeof(*cl
));
1950 if (!(cl
->q
= qdisc_create_dflt(sch
->dev
, &pfifo_qdisc_ops
)))
1951 cl
->q
= &noop_qdisc
;
1952 cl
->classid
= classid
;
1953 cl
->tparent
= parent
;
1955 cl
->allot
= parent
->allot
;
1956 cl
->quantum
= cl
->allot
;
1957 cl
->weight
= cl
->R_tab
->rate
.rate
;
1958 cl
->stats_lock
= &sch
->dev
->queue_lock
;
1962 cl
->borrow
= cl
->tparent
;
1963 if (cl
->tparent
!= &q
->link
)
1964 cl
->share
= cl
->tparent
;
1965 cbq_adjust_levels(parent
);
1966 cl
->minidle
= -0x7FFFFFFF;
1967 cbq_set_lss(cl
, RTA_DATA(tb
[TCA_CBQ_LSSOPT
-1]));
1968 cbq_set_wrr(cl
, RTA_DATA(tb
[TCA_CBQ_WRROPT
-1]));
1969 if (cl
->ewma_log
==0)
1970 cl
->ewma_log
= q
->link
.ewma_log
;
1972 cl
->maxidle
= q
->link
.maxidle
;
1974 cl
->avpkt
= q
->link
.avpkt
;
1975 cl
->overlimit
= cbq_ovl_classic
;
1976 if (tb
[TCA_CBQ_OVL_STRATEGY
-1])
1977 cbq_set_overlimit(cl
, RTA_DATA(tb
[TCA_CBQ_OVL_STRATEGY
-1]));
1978 #ifdef CONFIG_NET_CLS_POLICE
1979 if (tb
[TCA_CBQ_POLICE
-1])
1980 cbq_set_police(cl
, RTA_DATA(tb
[TCA_CBQ_POLICE
-1]));
1982 if (tb
[TCA_CBQ_FOPT
-1])
1983 cbq_set_fopt(cl
, RTA_DATA(tb
[TCA_CBQ_FOPT
-1]));
1984 sch_tree_unlock(sch
);
1986 #ifdef CONFIG_NET_ESTIMATOR
1987 if (tca
[TCA_RATE
-1])
1988 qdisc_new_estimator(&cl
->stats
, cl
->stats_lock
,
1992 *arg
= (unsigned long)cl
;
1996 qdisc_put_rtab(rtab
);
2000 static int cbq_delete(struct Qdisc
*sch
, unsigned long arg
)
2002 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2003 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2005 if (cl
->filters
|| cl
->children
|| cl
== &q
->link
)
2011 cbq_deactivate_class(cl
);
2013 if (q
->tx_borrowed
== cl
)
2014 q
->tx_borrowed
= q
->tx_class
;
2015 if (q
->tx_class
== cl
) {
2017 q
->tx_borrowed
= NULL
;
2019 #ifdef CONFIG_NET_CLS_POLICE
2020 if (q
->rx_class
== cl
)
2024 cbq_unlink_class(cl
);
2025 cbq_adjust_levels(cl
->tparent
);
2027 cbq_sync_defmap(cl
);
2030 sch_tree_unlock(sch
);
2032 if (--cl
->refcnt
== 0)
2033 cbq_destroy_class(sch
, cl
);
2038 static struct tcf_proto
**cbq_find_tcf(struct Qdisc
*sch
, unsigned long arg
)
2040 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2041 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2046 return &cl
->filter_list
;
2049 static unsigned long cbq_bind_filter(struct Qdisc
*sch
, unsigned long parent
,
2052 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2053 struct cbq_class
*p
= (struct cbq_class
*)parent
;
2054 struct cbq_class
*cl
= cbq_class_lookup(q
, classid
);
2057 if (p
&& p
->level
<= cl
->level
)
2060 return (unsigned long)cl
;
2065 static void cbq_unbind_filter(struct Qdisc
*sch
, unsigned long arg
)
2067 struct cbq_class
*cl
= (struct cbq_class
*)arg
;
2072 static void cbq_walk(struct Qdisc
*sch
, struct qdisc_walker
*arg
)
2074 struct cbq_sched_data
*q
= qdisc_priv(sch
);
2080 for (h
= 0; h
< 16; h
++) {
2081 struct cbq_class
*cl
;
2083 for (cl
= q
->classes
[h
]; cl
; cl
= cl
->next
) {
2084 if (arg
->count
< arg
->skip
) {
2088 if (arg
->fn(sch
, (unsigned long)cl
, arg
) < 0) {
2097 static struct Qdisc_class_ops cbq_class_ops
= {
2102 .change
= cbq_change_class
,
2103 .delete = cbq_delete
,
2105 .tcf_chain
= cbq_find_tcf
,
2106 .bind_tcf
= cbq_bind_filter
,
2107 .unbind_tcf
= cbq_unbind_filter
,
2108 .dump
= cbq_dump_class
,
2111 static struct Qdisc_ops cbq_qdisc_ops
= {
2113 .cl_ops
= &cbq_class_ops
,
2115 .priv_size
= sizeof(struct cbq_sched_data
),
2116 .enqueue
= cbq_enqueue
,
2117 .dequeue
= cbq_dequeue
,
2118 .requeue
= cbq_requeue
,
2122 .destroy
= cbq_destroy
,
2125 .owner
= THIS_MODULE
,
2128 static int __init
cbq_module_init(void)
2130 return register_qdisc(&cbq_qdisc_ops
);
2132 static void __exit
cbq_module_exit(void)
2134 unregister_qdisc(&cbq_qdisc_ops
);
2136 module_init(cbq_module_init
)
2137 module_exit(cbq_module_exit
)
2138 MODULE_LICENSE("GPL");