2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
32 #include <linux/proc_fs.h>
33 #include <linux/kmod.h>
36 #include <net/pkt_sched.h>
38 #include <asm/processor.h>
39 #include <asm/uaccess.h>
40 #include <asm/system.h>
41 #include <asm/bitops.h>
43 #ifdef CONFIG_RTNETLINK
44 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
, u32 clid
,
45 struct Qdisc
*old
, struct Qdisc
*new);
46 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
47 struct Qdisc
*q
, unsigned long cl
, int event
);
55 This file consists of two interrelated parts:
57 1. queueing disciplines manager frontend.
58 2. traffic classes manager frontend.
60 Generally, queueing discipline ("qdisc") is a black box,
61 which is able to enqueue packets and to dequeue them (when
62 device is ready to send something) in order and at times
63 determined by algorithm hidden in it.
65 qdisc's are divided to two categories:
66 - "queues", which have no internal structure visible from outside.
67 - "schedulers", which split all the packets to "traffic classes",
68 using "packet classifiers" (look at cls_api.c)
70 In turn, classes may have child qdiscs (as rule, queues)
71 attached to them etc. etc. etc.
73 The goal of the routines in this file is to translate
74 information supplied by user in the form of handles
75 to more intelligible for kernel form, to make some sanity
76 checks and part of work, which is common to all qdiscs
77 and to provide rtnetlink notifications.
79 All real intelligent work is done inside qdisc modules.
83 Every discipline has two major routines: enqueue and dequeue.
87 dequeue usually returns a skb to send. It is allowed to return NULL,
88 but it does not mean that queue is empty, it just means that
89 discipline does not want to send anything this time.
90 Queue is really empty if q->q.qlen == 0.
91 For complicated disciplines with multiple queues q->q is not
92 real packet queue, but however q->q.qlen must be valid.
96 enqueue returns 0, if packet was enqueued successfully.
97 If packet (this one or another one) was dropped, it returns
99 NET_XMIT_DROP - this packet dropped
100 Expected action: do not backoff, but wait until queue will clear.
101 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
102 Expected action: backoff or ignore
103 NET_XMIT_POLICED - dropped by police.
104 Expected action: backoff or error to real-time apps.
110 requeues once dequeued packet. It is used for non-standard or
111 just buggy devices, which can defer output even if dev->tbusy=0.
115 returns qdisc to initial state: purge all buffers, clear all
116 timers, counters (except for statistics) etc.
120 initializes newly created qdisc.
124 destroys resources allocated by init and during lifetime of qdisc.
128 changes qdisc parameters.
131 /* Protects list of registered TC modules. It is pure SMP lock. */
132 static rwlock_t qdisc_mod_lock
= RW_LOCK_UNLOCKED
;
135 /************************************************
136 * Queueing disciplines manipulation. *
137 ************************************************/
140 /* The list of all installed queueing disciplines. */
142 static struct Qdisc_ops
*qdisc_base
= NULL
;
144 /* Register/uregister queueing discipline */
146 int register_qdisc(struct Qdisc_ops
*qops
)
148 struct Qdisc_ops
*q
, **qp
;
150 write_lock(&qdisc_mod_lock
);
151 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
) {
152 if (strcmp(qops
->id
, q
->id
) == 0) {
153 write_unlock(&qdisc_mod_lock
);
158 if (qops
->enqueue
== NULL
)
159 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
160 if (qops
->requeue
== NULL
)
161 qops
->requeue
= noop_qdisc_ops
.requeue
;
162 if (qops
->dequeue
== NULL
)
163 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
167 write_unlock(&qdisc_mod_lock
);
171 int unregister_qdisc(struct Qdisc_ops
*qops
)
173 struct Qdisc_ops
*q
, **qp
;
176 write_lock(&qdisc_mod_lock
);
177 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
)
185 write_unlock(&qdisc_mod_lock
);
189 /* We know handle. Find qdisc among all qdisc's attached to device
190 (root qdisc, all its children, children of children etc.)
193 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
197 for (q
= dev
->qdisc_list
; q
; q
= q
->next
) {
198 if (q
->handle
== handle
)
204 struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
208 struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
212 cl
= cops
->get(p
, classid
);
215 leaf
= cops
->leaf(p
, cl
);
220 /* Find queueing discipline by name */
222 struct Qdisc_ops
*qdisc_lookup_ops(struct rtattr
*kind
)
224 struct Qdisc_ops
*q
= NULL
;
227 read_lock(&qdisc_mod_lock
);
228 for (q
= qdisc_base
; q
; q
= q
->next
) {
229 if (rtattr_strcmp(kind
, q
->id
) == 0)
232 read_unlock(&qdisc_mod_lock
);
237 static struct qdisc_rate_table
*qdisc_rtab_list
;
239 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct rtattr
*tab
)
241 struct qdisc_rate_table
*rtab
;
243 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
244 if (memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) == 0) {
250 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 || RTA_PAYLOAD(tab
) != 1024)
253 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
257 memcpy(rtab
->data
, RTA_DATA(tab
), 1024);
258 rtab
->next
= qdisc_rtab_list
;
259 qdisc_rtab_list
= rtab
;
264 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
266 struct qdisc_rate_table
*rtab
, **rtabp
;
268 if (!tab
|| --tab
->refcnt
)
271 for (rtabp
= &qdisc_rtab_list
; (rtab
=*rtabp
) != NULL
; rtabp
= &rtab
->next
) {
281 /* Allocate an unique handle from space managed by kernel */
283 u32
qdisc_alloc_handle(struct net_device
*dev
)
286 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
289 autohandle
+= TC_H_MAKE(0x10000U
, 0);
290 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
291 autohandle
= TC_H_MAKE(0x80000000U
, 0);
292 } while (qdisc_lookup(dev
, autohandle
) && --i
> 0);
294 return i
>0 ? autohandle
: 0;
297 /* Attach toplevel qdisc to device dev */
299 static struct Qdisc
*
300 dev_graft_qdisc(struct net_device
*dev
, struct Qdisc
*qdisc
)
302 struct Qdisc
*oqdisc
;
304 if (dev
->flags
& IFF_UP
)
307 write_lock(&qdisc_tree_lock
);
308 spin_lock_bh(&dev
->queue_lock
);
309 oqdisc
= dev
->qdisc_sleeping
;
311 /* Prune old scheduler */
312 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1)
315 /* ... and graft new one */
318 dev
->qdisc_sleeping
= qdisc
;
319 dev
->qdisc
= &noop_qdisc
;
320 spin_unlock_bh(&dev
->queue_lock
);
321 write_unlock(&qdisc_tree_lock
);
323 if (dev
->flags
& IFF_UP
)
330 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
333 Old qdisc is not destroyed but returned in *old.
336 int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
, u32 classid
,
337 struct Qdisc
*new, struct Qdisc
**old
)
341 if (parent
== NULL
) {
342 *old
= dev_graft_qdisc(dev
, new);
344 struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
349 unsigned long cl
= cops
->get(parent
, classid
);
351 err
= cops
->graft(parent
, cl
, new, old
);
352 cops
->put(parent
, cl
);
359 #ifdef CONFIG_RTNETLINK
362 Allocate and initialize new qdisc.
364 Parameters are passed via opt.
367 static struct Qdisc
*
368 qdisc_create(struct net_device
*dev
, u32 handle
, struct rtattr
**tca
, int *errp
)
371 struct rtattr
*kind
= tca
[TCA_KIND
-1];
372 struct Qdisc
*sch
= NULL
;
373 struct Qdisc_ops
*ops
;
376 ops
= qdisc_lookup_ops(kind
);
378 if (ops
==NULL
&& tca
[TCA_KIND
-1] != NULL
) {
379 char module_name
[4 + IFNAMSIZ
+ 1];
381 if (RTA_PAYLOAD(kind
) <= IFNAMSIZ
) {
382 sprintf(module_name
, "sch_%s", (char*)RTA_DATA(kind
));
383 request_module (module_name
);
384 ops
= qdisc_lookup_ops(kind
);
393 size
= sizeof(*sch
) + ops
->priv_size
;
395 sch
= kmalloc(size
, GFP_KERNEL
);
400 /* Grrr... Resolve race condition with module unload */
403 if (ops
!= qdisc_lookup_ops(kind
))
406 memset(sch
, 0, size
);
408 skb_queue_head_init(&sch
->q
);
410 sch
->enqueue
= ops
->enqueue
;
411 sch
->dequeue
= ops
->dequeue
;
413 atomic_set(&sch
->refcnt
, 1);
414 sch
->stats
.lock
= &dev
->queue_lock
;
416 handle
= qdisc_alloc_handle(dev
);
421 sch
->handle
= handle
;
423 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
-1])) == 0) {
424 write_lock(&qdisc_tree_lock
);
425 sch
->next
= dev
->qdisc_list
;
426 dev
->qdisc_list
= sch
;
427 write_unlock(&qdisc_tree_lock
);
428 #ifdef CONFIG_NET_ESTIMATOR
430 qdisc_new_estimator(&sch
->stats
, tca
[TCA_RATE
-1]);
442 static int qdisc_change(struct Qdisc
*sch
, struct rtattr
**tca
)
444 if (tca
[TCA_OPTIONS
-1]) {
447 if (sch
->ops
->change
== NULL
)
449 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
-1]);
453 #ifdef CONFIG_NET_ESTIMATOR
454 if (tca
[TCA_RATE
-1]) {
455 qdisc_kill_estimator(&sch
->stats
);
456 qdisc_new_estimator(&sch
->stats
, tca
[TCA_RATE
-1]);
462 struct check_loop_arg
464 struct qdisc_walker w
;
469 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
471 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
473 struct check_loop_arg arg
;
475 if (q
->ops
->cl_ops
== NULL
)
478 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
479 arg
.w
.fn
= check_loop_fn
;
482 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
483 return arg
.w
.stop
? -ELOOP
: 0;
487 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
490 struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
491 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
493 leaf
= cops
->leaf(q
, cl
);
495 if (leaf
== arg
->p
|| arg
->depth
> 7)
497 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
506 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
508 struct tcmsg
*tcm
= NLMSG_DATA(n
);
509 struct rtattr
**tca
= arg
;
510 struct net_device
*dev
;
511 u32 clid
= tcm
->tcm_parent
;
512 struct Qdisc
*q
= NULL
;
513 struct Qdisc
*p
= NULL
;
516 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
520 if (clid
!= TC_H_ROOT
) {
521 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
523 q
= qdisc_leaf(p
, clid
);
525 q
= dev
->qdisc_sleeping
;
530 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
533 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
537 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
540 if (n
->nlmsg_type
== RTM_DELQDISC
) {
545 if ((err
= qdisc_graft(dev
, p
, clid
, NULL
, &q
)) != 0)
548 qdisc_notify(skb
, n
, clid
, q
, NULL
);
549 spin_lock_bh(&dev
->queue_lock
);
551 spin_unlock_bh(&dev
->queue_lock
);
554 qdisc_notify(skb
, n
, clid
, NULL
, q
);
563 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
565 struct tcmsg
*tcm
= NLMSG_DATA(n
);
566 struct rtattr
**tca
= arg
;
567 struct net_device
*dev
;
568 u32 clid
= tcm
->tcm_parent
;
569 struct Qdisc
*q
= NULL
;
570 struct Qdisc
*p
= NULL
;
573 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
577 if (clid
!= TC_H_ROOT
) {
578 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
580 q
= qdisc_leaf(p
, clid
);
582 q
= dev
->qdisc_sleeping
;
585 /* It may be default qdisc, ignore it */
586 if (q
&& q
->handle
== 0)
589 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
590 if (tcm
->tcm_handle
) {
591 if (q
&& !(n
->nlmsg_flags
&NLM_F_REPLACE
))
593 if (TC_H_MIN(tcm
->tcm_handle
))
595 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
597 if (n
->nlmsg_flags
&NLM_F_EXCL
)
599 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
602 (p
&& check_loop(q
, p
, 0)))
604 atomic_inc(&q
->refcnt
);
610 /* This magic test requires explanation.
612 * We know, that some child q is already
613 * attached to this parent and have choice:
614 * either to change it or to create/graft new one.
616 * 1. We are allowed to create/graft only
617 * if CREATE and REPLACE flags are set.
619 * 2. If EXCL is set, requestor wanted to say,
620 * that qdisc tcm_handle is not expected
621 * to exist, so that we choose create/graft too.
623 * 3. The last case is when no flags are set.
624 * Alas, it is sort of hole in API, we
625 * cannot decide what to do unambiguously.
626 * For now we select create/graft, if
627 * user gave KIND, which does not match existing.
629 if ((n
->nlmsg_flags
&NLM_F_CREATE
) &&
630 (n
->nlmsg_flags
&NLM_F_REPLACE
) &&
631 ((n
->nlmsg_flags
&NLM_F_EXCL
) ||
633 rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))))
638 if (!tcm
->tcm_handle
)
640 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
643 /* Change qdisc parameters */
646 if (n
->nlmsg_flags
&NLM_F_EXCL
)
648 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
650 err
= qdisc_change(q
, tca
);
652 qdisc_notify(skb
, n
, clid
, NULL
, q
);
656 if (!(n
->nlmsg_flags
&NLM_F_CREATE
))
658 q
= qdisc_create(dev
, tcm
->tcm_handle
, tca
, &err
);
664 struct Qdisc
*old_q
= NULL
;
665 err
= qdisc_graft(dev
, p
, clid
, q
, &old_q
);
668 spin_lock_bh(&dev
->queue_lock
);
670 spin_unlock_bh(&dev
->queue_lock
);
674 qdisc_notify(skb
, n
, clid
, old_q
, q
);
676 spin_lock_bh(&dev
->queue_lock
);
677 qdisc_destroy(old_q
);
678 spin_unlock_bh(&dev
->queue_lock
);
684 int qdisc_copy_stats(struct sk_buff
*skb
, struct tc_stats
*st
)
686 spin_lock_bh(st
->lock
);
687 RTA_PUT(skb
, TCA_STATS
, (char*)&st
->lock
- (char*)st
, st
);
688 spin_unlock_bh(st
->lock
);
692 spin_unlock_bh(st
->lock
);
697 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
698 u32 pid
, u32 seq
, unsigned flags
, int event
)
701 struct nlmsghdr
*nlh
;
702 unsigned char *b
= skb
->tail
;
704 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*tcm
));
705 nlh
->nlmsg_flags
= flags
;
706 tcm
= NLMSG_DATA(nlh
);
707 tcm
->tcm_family
= AF_UNSPEC
;
708 tcm
->tcm_ifindex
= q
->dev
? q
->dev
->ifindex
: 0;
709 tcm
->tcm_parent
= clid
;
710 tcm
->tcm_handle
= q
->handle
;
711 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
712 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
713 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
715 q
->stats
.qlen
= q
->q
.qlen
;
716 if (qdisc_copy_stats(skb
, &q
->stats
))
718 nlh
->nlmsg_len
= skb
->tail
- b
;
723 skb_trim(skb
, b
- skb
->data
);
727 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
728 u32 clid
, struct Qdisc
*old
, struct Qdisc
*new)
731 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
733 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
737 if (old
&& old
->handle
) {
738 if (tc_fill_qdisc(skb
, old
, clid
, pid
, n
->nlmsg_seq
, 0, RTM_DELQDISC
) < 0)
742 if (tc_fill_qdisc(skb
, new, clid
, pid
, n
->nlmsg_seq
, old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
747 return rtnetlink_send(skb
, pid
, RTMGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
754 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
758 struct net_device
*dev
;
762 s_q_idx
= q_idx
= cb
->args
[1];
763 read_lock(&dev_base_lock
);
764 for (dev
=dev_base
, idx
=0; dev
; dev
= dev
->next
, idx
++) {
769 read_lock(&qdisc_tree_lock
);
770 for (q
= dev
->qdisc_list
, q_idx
= 0; q
;
771 q
= q
->next
, q_idx
++) {
774 if (tc_fill_qdisc(skb
, q
, 0, NETLINK_CB(cb
->skb
).pid
,
775 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0) {
776 read_unlock(&qdisc_tree_lock
);
780 read_unlock(&qdisc_tree_lock
);
784 read_unlock(&dev_base_lock
);
794 /************************************************
795 * Traffic classes manipulation. *
796 ************************************************/
800 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
802 struct tcmsg
*tcm
= NLMSG_DATA(n
);
803 struct rtattr
**tca
= arg
;
804 struct net_device
*dev
;
805 struct Qdisc
*q
= NULL
;
806 struct Qdisc_class_ops
*cops
;
807 unsigned long cl
= 0;
808 unsigned long new_cl
;
809 u32 pid
= tcm
->tcm_parent
;
810 u32 clid
= tcm
->tcm_handle
;
811 u32 qid
= TC_H_MAJ(clid
);
814 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
818 parent == TC_H_UNSPEC - unspecified parent.
819 parent == TC_H_ROOT - class is root, which has no parent.
820 parent == X:0 - parent is root class.
821 parent == X:Y - parent is a node in hierarchy.
822 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
824 handle == 0:0 - generate handle from kernel pool.
825 handle == 0:Y - class is X:Y, where X:0 is qdisc.
826 handle == X:Y - clear.
827 handle == X:0 - root class.
830 /* Step 1. Determine qdisc handle X:0 */
832 if (pid
!= TC_H_ROOT
) {
833 u32 qid1
= TC_H_MAJ(pid
);
836 /* If both majors are known, they must be identical. */
842 qid
= dev
->qdisc_sleeping
->handle
;
844 /* Now qid is genuine qdisc handle consistent
845 both with parent and child.
847 TC_H_MAJ(pid) still may be unspecified, complete it now.
850 pid
= TC_H_MAKE(qid
, pid
);
853 qid
= dev
->qdisc_sleeping
->handle
;
856 /* OK. Locate qdisc */
857 if ((q
= qdisc_lookup(dev
, qid
)) == NULL
)
860 /* An check that it supports classes */
861 cops
= q
->ops
->cl_ops
;
865 /* Now try to get class */
867 if (pid
== TC_H_ROOT
)
870 clid
= TC_H_MAKE(qid
, clid
);
873 cl
= cops
->get(q
, clid
);
877 if (n
->nlmsg_type
!= RTM_NEWTCLASS
|| !(n
->nlmsg_flags
&NLM_F_CREATE
))
880 switch (n
->nlmsg_type
) {
883 if (n
->nlmsg_flags
&NLM_F_EXCL
)
887 err
= cops
->delete(q
, cl
);
889 tclass_notify(skb
, n
, q
, cl
, RTM_DELTCLASS
);
892 err
= tclass_notify(skb
, n
, q
, cl
, RTM_NEWTCLASS
);
901 err
= cops
->change(q
, clid
, pid
, tca
, &new_cl
);
903 tclass_notify(skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
913 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
915 u32 pid
, u32 seq
, unsigned flags
, int event
)
918 struct nlmsghdr
*nlh
;
919 unsigned char *b
= skb
->tail
;
921 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*tcm
));
922 nlh
->nlmsg_flags
= flags
;
923 tcm
= NLMSG_DATA(nlh
);
924 tcm
->tcm_family
= AF_UNSPEC
;
925 tcm
->tcm_ifindex
= q
->dev
? q
->dev
->ifindex
: 0;
926 tcm
->tcm_parent
= q
->handle
;
927 tcm
->tcm_handle
= q
->handle
;
929 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
930 if (q
->ops
->cl_ops
->dump
&& q
->ops
->cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
932 nlh
->nlmsg_len
= skb
->tail
- b
;
937 skb_trim(skb
, b
- skb
->data
);
941 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
942 struct Qdisc
*q
, unsigned long cl
, int event
)
945 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
947 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
951 if (tc_fill_tclass(skb
, q
, cl
, pid
, n
->nlmsg_seq
, 0, event
) < 0) {
956 return rtnetlink_send(skb
, pid
, RTMGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
959 struct qdisc_dump_args
961 struct qdisc_walker w
;
963 struct netlink_callback
*cb
;
966 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
968 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
970 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).pid
,
971 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
974 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
978 struct net_device
*dev
;
980 struct tcmsg
*tcm
= (struct tcmsg
*)NLMSG_DATA(cb
->nlh
);
981 struct qdisc_dump_args arg
;
983 if (cb
->nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*tcm
)))
985 if ((dev
= dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
990 read_lock(&qdisc_tree_lock
);
991 for (q
=dev
->qdisc_list
, t
=0; q
; q
= q
->next
, t
++) {
992 if (t
< s_t
) continue;
993 if (!q
->ops
->cl_ops
) continue;
994 if (tcm
->tcm_parent
&& TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)
997 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
998 arg
.w
.fn
= qdisc_class_dump
;
1002 arg
.w
.skip
= cb
->args
[1];
1004 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1005 cb
->args
[1] = arg
.w
.count
;
1009 read_unlock(&qdisc_tree_lock
);
1018 int psched_us_per_tick
= 1;
1019 int psched_tick_per_us
= 1;
1021 #ifdef CONFIG_PROC_FS
1022 static int psched_read_proc(char *buffer
, char **start
, off_t offset
,
1023 int length
, int *eof
, void *data
)
1027 len
= sprintf(buffer
, "%08x %08x %08x %08x\n",
1028 psched_tick_per_us
, psched_us_per_tick
,
1038 *start
= buffer
+ offset
;
1045 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
1046 int psched_tod_diff(int delta_sec
, int bound
)
1050 if (bound
<= 1000000 || delta_sec
> (0x7FFFFFFF/1000000)-1)
1052 delta
= delta_sec
* 1000000;
1059 psched_time_t psched_time_base
;
1061 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1062 psched_tdiff_t psched_clock_per_hz
;
1063 int psched_clock_scale
;
1066 #ifdef PSCHED_WATCHER
1067 PSCHED_WATCHER psched_time_mark
;
1069 static void psched_tick(unsigned long);
1071 static struct timer_list psched_timer
=
1072 { NULL
, NULL
, 0, 0L, psched_tick
};
1074 static void psched_tick(unsigned long dummy
)
1076 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1077 psched_time_t dummy_stamp
;
1078 PSCHED_GET_TIME(dummy_stamp
);
1079 /* It is OK up to 4GHz cpu */
1080 psched_timer
.expires
= jiffies
+ 1*HZ
;
1082 unsigned long now
= jiffies
;
1083 psched_time_base
= ((u64
)now
)<<PSCHED_JSCALE
;
1084 psched_time_mark
= now
;
1085 psched_timer
.expires
= now
+ 60*60*HZ
;
1087 add_timer(&psched_timer
);
1091 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1092 int __init
psched_calibrate_clock(void)
1094 psched_time_t stamp
, stamp1
;
1095 struct timeval tv
, tv1
;
1096 psched_tdiff_t delay
;
1100 #if CPU == 586 || CPU == 686
1101 if (!(boot_cpu_data
.x86_capability
& X86_FEATURE_TSC
))
1105 #ifdef PSCHED_WATCHER
1108 stop
= jiffies
+ HZ
/10;
1109 PSCHED_GET_TIME(stamp
);
1110 do_gettimeofday(&tv
);
1111 while (time_before(jiffies
, stop
))
1113 PSCHED_GET_TIME(stamp1
);
1114 do_gettimeofday(&tv1
);
1116 delay
= PSCHED_TDIFF(stamp1
, stamp
);
1117 rdelay
= tv1
.tv_usec
- tv
.tv_usec
;
1118 rdelay
+= (tv1
.tv_sec
- tv
.tv_sec
)*1000000;
1122 psched_tick_per_us
= delay
;
1123 while ((delay
>>=1) != 0)
1124 psched_clock_scale
++;
1125 psched_us_per_tick
= 1<<psched_clock_scale
;
1126 psched_clock_per_hz
= (psched_tick_per_us
*(1000000/HZ
))>>psched_clock_scale
;
1131 int __init
pktsched_init(void)
1133 #ifdef CONFIG_RTNETLINK
1134 struct rtnetlink_link
*link_p
;
1136 #ifdef CONFIG_PROC_FS
1137 struct proc_dir_entry
*ent
;
1140 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1141 if (psched_calibrate_clock() < 0)
1143 #elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1144 psched_tick_per_us
= HZ
<<PSCHED_JSCALE
;
1145 psched_us_per_tick
= 1000000;
1146 #ifdef PSCHED_WATCHER
1151 #ifdef CONFIG_RTNETLINK
1152 link_p
= rtnetlink_links
[PF_UNSPEC
];
1154 /* Setup rtnetlink links. It is made here to avoid
1155 exporting large number of public symbols.
1159 link_p
[RTM_NEWQDISC
-RTM_BASE
].doit
= tc_modify_qdisc
;
1160 link_p
[RTM_DELQDISC
-RTM_BASE
].doit
= tc_get_qdisc
;
1161 link_p
[RTM_GETQDISC
-RTM_BASE
].doit
= tc_get_qdisc
;
1162 link_p
[RTM_GETQDISC
-RTM_BASE
].dumpit
= tc_dump_qdisc
;
1163 link_p
[RTM_NEWTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1164 link_p
[RTM_DELTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1165 link_p
[RTM_GETTCLASS
-RTM_BASE
].doit
= tc_ctl_tclass
;
1166 link_p
[RTM_GETTCLASS
-RTM_BASE
].dumpit
= tc_dump_tclass
;
1170 #define INIT_QDISC(name) { \
1171 extern struct Qdisc_ops name##_qdisc_ops; \
1172 register_qdisc(&##name##_qdisc_ops); \
1178 #ifdef CONFIG_NET_SCH_CBQ
1181 #ifdef CONFIG_NET_SCH_CSZ
1184 #ifdef CONFIG_NET_SCH_HPFQ
1187 #ifdef CONFIG_NET_SCH_HFSC
1190 #ifdef CONFIG_NET_SCH_RED
1193 #ifdef CONFIG_NET_SCH_GRED
1196 #ifdef CONFIG_NET_SCH_DSMARK
1199 #ifdef CONFIG_NET_SCH_SFQ
1202 #ifdef CONFIG_NET_SCH_TBF
1205 #ifdef CONFIG_NET_SCH_TEQL
1208 #ifdef CONFIG_NET_SCH_PRIO
1211 #ifdef CONFIG_NET_SCH_ATM
1214 #ifdef CONFIG_NET_CLS
1218 #ifdef CONFIG_PROC_FS
1219 ent
= create_proc_entry("net/psched", 0, 0);
1220 ent
->read_proc
= psched_read_proc
;