2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
34 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
, u32 clid
,
35 struct Qdisc
*old
, struct Qdisc
*new);
36 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
37 struct Qdisc
*q
, unsigned long cl
, int event
);
44 This file consists of two interrelated parts:
46 1. queueing disciplines manager frontend.
47 2. traffic classes manager frontend.
49 Generally, queueing discipline ("qdisc") is a black box,
50 which is able to enqueue packets and to dequeue them (when
51 device is ready to send something) in order and at times
52 determined by algorithm hidden in it.
54 qdisc's are divided to two categories:
55 - "queues", which have no internal structure visible from outside.
56 - "schedulers", which split all the packets to "traffic classes",
57 using "packet classifiers" (look at cls_api.c)
59 In turn, classes may have child qdiscs (as rule, queues)
60 attached to them etc. etc. etc.
62 The goal of the routines in this file is to translate
63 information supplied by user in the form of handles
64 to more intelligible for kernel form, to make some sanity
65 checks and part of work, which is common to all qdiscs
66 and to provide rtnetlink notifications.
68 All real intelligent work is done inside qdisc modules.
72 Every discipline has two major routines: enqueue and dequeue.
76 dequeue usually returns a skb to send. It is allowed to return NULL,
77 but it does not mean that queue is empty, it just means that
78 discipline does not want to send anything this time.
79 Queue is really empty if q->q.qlen == 0.
80 For complicated disciplines with multiple queues q->q is not
81 real packet queue, but however q->q.qlen must be valid.
85 enqueue returns 0, if packet was enqueued successfully.
86 If packet (this one or another one) was dropped, it returns
88 NET_XMIT_DROP - this packet dropped
89 Expected action: do not backoff, but wait until queue will clear.
90 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
91 Expected action: backoff or ignore
92 NET_XMIT_POLICED - dropped by police.
93 Expected action: backoff or error to real-time apps.
99 requeues once dequeued packet. It is used for non-standard or
100 just buggy devices, which can defer output even if dev->tbusy=0.
104 returns qdisc to initial state: purge all buffers, clear all
105 timers, counters (except for statistics) etc.
109 initializes newly created qdisc.
113 destroys resources allocated by init and during lifetime of qdisc.
117 changes qdisc parameters.
120 /* Protects list of registered TC modules. It is pure SMP lock. */
121 static DEFINE_RWLOCK(qdisc_mod_lock
);
124 /************************************************
125 * Queueing disciplines manipulation. *
126 ************************************************/
129 /* The list of all installed queueing disciplines. */
131 static struct Qdisc_ops
*qdisc_base
;
133 /* Register/uregister queueing discipline */
135 int register_qdisc(struct Qdisc_ops
*qops
)
137 struct Qdisc_ops
*q
, **qp
;
140 write_lock(&qdisc_mod_lock
);
141 for (qp
= &qdisc_base
; (q
= *qp
) != NULL
; qp
= &q
->next
)
142 if (!strcmp(qops
->id
, q
->id
))
145 if (qops
->enqueue
== NULL
)
146 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
147 if (qops
->requeue
== NULL
)
148 qops
->requeue
= noop_qdisc_ops
.requeue
;
149 if (qops
->dequeue
== NULL
)
150 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
156 write_unlock(&qdisc_mod_lock
);
160 int unregister_qdisc(struct Qdisc_ops
*qops
)
162 struct Qdisc_ops
*q
, **qp
;
165 write_lock(&qdisc_mod_lock
);
166 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
)
174 write_unlock(&qdisc_mod_lock
);
178 /* We know handle. Find qdisc among all qdisc's attached to device
179 (root qdisc, all its children, children of children etc.)
182 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
186 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
187 if (q
->handle
== handle
)
193 static struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
197 struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
201 cl
= cops
->get(p
, classid
);
205 leaf
= cops
->leaf(p
, cl
);
210 /* Find queueing discipline by name */
212 static struct Qdisc_ops
*qdisc_lookup_ops(struct rtattr
*kind
)
214 struct Qdisc_ops
*q
= NULL
;
217 read_lock(&qdisc_mod_lock
);
218 for (q
= qdisc_base
; q
; q
= q
->next
) {
219 if (rtattr_strcmp(kind
, q
->id
) == 0) {
220 if (!try_module_get(q
->owner
))
225 read_unlock(&qdisc_mod_lock
);
230 static struct qdisc_rate_table
*qdisc_rtab_list
;
232 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct rtattr
*tab
)
234 struct qdisc_rate_table
*rtab
;
236 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
237 if (memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) == 0) {
243 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 || RTA_PAYLOAD(tab
) != 1024)
246 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
250 memcpy(rtab
->data
, RTA_DATA(tab
), 1024);
251 rtab
->next
= qdisc_rtab_list
;
252 qdisc_rtab_list
= rtab
;
257 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
259 struct qdisc_rate_table
*rtab
, **rtabp
;
261 if (!tab
|| --tab
->refcnt
)
264 for (rtabp
= &qdisc_rtab_list
; (rtab
=*rtabp
) != NULL
; rtabp
= &rtab
->next
) {
273 static enum hrtimer_restart
qdisc_watchdog(struct hrtimer
*timer
)
275 struct qdisc_watchdog
*wd
= container_of(timer
, struct qdisc_watchdog
,
277 struct net_device
*dev
= wd
->qdisc
->dev
;
279 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
283 return HRTIMER_NORESTART
;
286 void qdisc_watchdog_init(struct qdisc_watchdog
*wd
, struct Qdisc
*qdisc
)
288 hrtimer_init(&wd
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
289 wd
->timer
.function
= qdisc_watchdog
;
292 EXPORT_SYMBOL(qdisc_watchdog_init
);
294 void qdisc_watchdog_schedule(struct qdisc_watchdog
*wd
, psched_time_t expires
)
298 wd
->qdisc
->flags
|= TCQ_F_THROTTLED
;
299 time
= ktime_set(0, 0);
300 time
= ktime_add_ns(time
, PSCHED_US2NS(expires
));
301 hrtimer_start(&wd
->timer
, time
, HRTIMER_MODE_ABS
);
303 EXPORT_SYMBOL(qdisc_watchdog_schedule
);
305 void qdisc_watchdog_cancel(struct qdisc_watchdog
*wd
)
307 hrtimer_cancel(&wd
->timer
);
308 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
310 EXPORT_SYMBOL(qdisc_watchdog_cancel
);
312 /* Allocate an unique handle from space managed by kernel */
314 static u32
qdisc_alloc_handle(struct net_device
*dev
)
317 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
320 autohandle
+= TC_H_MAKE(0x10000U
, 0);
321 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
322 autohandle
= TC_H_MAKE(0x80000000U
, 0);
323 } while (qdisc_lookup(dev
, autohandle
) && --i
> 0);
325 return i
>0 ? autohandle
: 0;
328 /* Attach toplevel qdisc to device dev */
330 static struct Qdisc
*
331 dev_graft_qdisc(struct net_device
*dev
, struct Qdisc
*qdisc
)
333 struct Qdisc
*oqdisc
;
335 if (dev
->flags
& IFF_UP
)
338 qdisc_lock_tree(dev
);
339 if (qdisc
&& qdisc
->flags
&TCQ_F_INGRESS
) {
340 oqdisc
= dev
->qdisc_ingress
;
341 /* Prune old scheduler */
342 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1) {
345 dev
->qdisc_ingress
= NULL
;
347 dev
->qdisc_ingress
= qdisc
;
352 oqdisc
= dev
->qdisc_sleeping
;
354 /* Prune old scheduler */
355 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1)
358 /* ... and graft new one */
361 dev
->qdisc_sleeping
= qdisc
;
362 dev
->qdisc
= &noop_qdisc
;
365 qdisc_unlock_tree(dev
);
367 if (dev
->flags
& IFF_UP
)
373 void qdisc_tree_decrease_qlen(struct Qdisc
*sch
, unsigned int n
)
375 struct Qdisc_class_ops
*cops
;
381 while ((parentid
= sch
->parent
)) {
382 sch
= qdisc_lookup(sch
->dev
, TC_H_MAJ(parentid
));
383 cops
= sch
->ops
->cl_ops
;
384 if (cops
->qlen_notify
) {
385 cl
= cops
->get(sch
, parentid
);
386 cops
->qlen_notify(sch
, cl
);
392 EXPORT_SYMBOL(qdisc_tree_decrease_qlen
);
394 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
397 Old qdisc is not destroyed but returned in *old.
400 static int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
,
402 struct Qdisc
*new, struct Qdisc
**old
)
405 struct Qdisc
*q
= *old
;
408 if (parent
== NULL
) {
409 if (q
&& q
->flags
&TCQ_F_INGRESS
) {
410 *old
= dev_graft_qdisc(dev
, q
);
412 *old
= dev_graft_qdisc(dev
, new);
415 struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
420 unsigned long cl
= cops
->get(parent
, classid
);
422 err
= cops
->graft(parent
, cl
, new, old
);
424 new->parent
= classid
;
425 cops
->put(parent
, cl
);
433 Allocate and initialize new qdisc.
435 Parameters are passed via opt.
438 static struct Qdisc
*
439 qdisc_create(struct net_device
*dev
, u32 handle
, struct rtattr
**tca
, int *errp
)
442 struct rtattr
*kind
= tca
[TCA_KIND
-1];
444 struct Qdisc_ops
*ops
;
446 ops
= qdisc_lookup_ops(kind
);
448 if (ops
== NULL
&& kind
!= NULL
) {
450 if (rtattr_strlcpy(name
, kind
, IFNAMSIZ
) < IFNAMSIZ
) {
451 /* We dropped the RTNL semaphore in order to
452 * perform the module load. So, even if we
453 * succeeded in loading the module we have to
454 * tell the caller to replay the request. We
455 * indicate this using -EAGAIN.
456 * We replay the request because the device may
457 * go away in the mean time.
460 request_module("sch_%s", name
);
462 ops
= qdisc_lookup_ops(kind
);
464 /* We will try again qdisc_lookup_ops,
465 * so don't keep a reference.
467 module_put(ops
->owner
);
479 sch
= qdisc_alloc(dev
, ops
);
485 if (handle
== TC_H_INGRESS
) {
486 sch
->flags
|= TCQ_F_INGRESS
;
487 sch
->stats_lock
= &dev
->ingress_lock
;
488 handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
490 sch
->stats_lock
= &dev
->queue_lock
;
492 handle
= qdisc_alloc_handle(dev
);
499 sch
->handle
= handle
;
501 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
-1])) == 0) {
502 if (tca
[TCA_RATE
-1]) {
503 err
= gen_new_estimator(&sch
->bstats
, &sch
->rate_est
,
508 * Any broken qdiscs that would require
509 * a ops->reset() here? The qdisc was never
510 * in action so it shouldn't be necessary.
517 qdisc_lock_tree(dev
);
518 list_add_tail(&sch
->list
, &dev
->qdisc_list
);
519 qdisc_unlock_tree(dev
);
525 kfree((char *) sch
- sch
->padded
);
527 module_put(ops
->owner
);
533 static int qdisc_change(struct Qdisc
*sch
, struct rtattr
**tca
)
535 if (tca
[TCA_OPTIONS
-1]) {
538 if (sch
->ops
->change
== NULL
)
540 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
-1]);
545 gen_replace_estimator(&sch
->bstats
, &sch
->rate_est
,
546 sch
->stats_lock
, tca
[TCA_RATE
-1]);
550 struct check_loop_arg
552 struct qdisc_walker w
;
557 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
559 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
561 struct check_loop_arg arg
;
563 if (q
->ops
->cl_ops
== NULL
)
566 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
567 arg
.w
.fn
= check_loop_fn
;
570 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
571 return arg
.w
.stop
? -ELOOP
: 0;
575 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
578 struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
579 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
581 leaf
= cops
->leaf(q
, cl
);
583 if (leaf
== arg
->p
|| arg
->depth
> 7)
585 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
594 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
596 struct tcmsg
*tcm
= NLMSG_DATA(n
);
597 struct rtattr
**tca
= arg
;
598 struct net_device
*dev
;
599 u32 clid
= tcm
->tcm_parent
;
600 struct Qdisc
*q
= NULL
;
601 struct Qdisc
*p
= NULL
;
604 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
608 if (clid
!= TC_H_ROOT
) {
609 if (TC_H_MAJ(clid
) != TC_H_MAJ(TC_H_INGRESS
)) {
610 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
612 q
= qdisc_leaf(p
, clid
);
613 } else { /* ingress */
614 q
= dev
->qdisc_ingress
;
617 q
= dev
->qdisc_sleeping
;
622 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
625 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
629 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
632 if (n
->nlmsg_type
== RTM_DELQDISC
) {
637 if ((err
= qdisc_graft(dev
, p
, clid
, NULL
, &q
)) != 0)
640 qdisc_notify(skb
, n
, clid
, q
, NULL
);
641 qdisc_lock_tree(dev
);
643 qdisc_unlock_tree(dev
);
646 qdisc_notify(skb
, n
, clid
, NULL
, q
);
655 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
659 struct net_device
*dev
;
665 /* Reinit, just in case something touches this. */
668 clid
= tcm
->tcm_parent
;
671 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
675 if (clid
!= TC_H_ROOT
) {
676 if (clid
!= TC_H_INGRESS
) {
677 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
679 q
= qdisc_leaf(p
, clid
);
680 } else { /*ingress */
681 q
= dev
->qdisc_ingress
;
684 q
= dev
->qdisc_sleeping
;
687 /* It may be default qdisc, ignore it */
688 if (q
&& q
->handle
== 0)
691 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
692 if (tcm
->tcm_handle
) {
693 if (q
&& !(n
->nlmsg_flags
&NLM_F_REPLACE
))
695 if (TC_H_MIN(tcm
->tcm_handle
))
697 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
699 if (n
->nlmsg_flags
&NLM_F_EXCL
)
701 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
704 (p
&& check_loop(q
, p
, 0)))
706 atomic_inc(&q
->refcnt
);
712 /* This magic test requires explanation.
714 * We know, that some child q is already
715 * attached to this parent and have choice:
716 * either to change it or to create/graft new one.
718 * 1. We are allowed to create/graft only
719 * if CREATE and REPLACE flags are set.
721 * 2. If EXCL is set, requestor wanted to say,
722 * that qdisc tcm_handle is not expected
723 * to exist, so that we choose create/graft too.
725 * 3. The last case is when no flags are set.
726 * Alas, it is sort of hole in API, we
727 * cannot decide what to do unambiguously.
728 * For now we select create/graft, if
729 * user gave KIND, which does not match existing.
731 if ((n
->nlmsg_flags
&NLM_F_CREATE
) &&
732 (n
->nlmsg_flags
&NLM_F_REPLACE
) &&
733 ((n
->nlmsg_flags
&NLM_F_EXCL
) ||
735 rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))))
740 if (!tcm
->tcm_handle
)
742 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
745 /* Change qdisc parameters */
748 if (n
->nlmsg_flags
&NLM_F_EXCL
)
750 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
752 err
= qdisc_change(q
, tca
);
754 qdisc_notify(skb
, n
, clid
, NULL
, q
);
758 if (!(n
->nlmsg_flags
&NLM_F_CREATE
))
760 if (clid
== TC_H_INGRESS
)
761 q
= qdisc_create(dev
, tcm
->tcm_parent
, tca
, &err
);
763 q
= qdisc_create(dev
, tcm
->tcm_handle
, tca
, &err
);
772 struct Qdisc
*old_q
= NULL
;
773 err
= qdisc_graft(dev
, p
, clid
, q
, &old_q
);
776 qdisc_lock_tree(dev
);
778 qdisc_unlock_tree(dev
);
782 qdisc_notify(skb
, n
, clid
, old_q
, q
);
784 qdisc_lock_tree(dev
);
785 qdisc_destroy(old_q
);
786 qdisc_unlock_tree(dev
);
792 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
793 u32 pid
, u32 seq
, u16 flags
, int event
)
796 struct nlmsghdr
*nlh
;
797 unsigned char *b
= skb_tail_pointer(skb
);
800 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
801 tcm
= NLMSG_DATA(nlh
);
802 tcm
->tcm_family
= AF_UNSPEC
;
805 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
806 tcm
->tcm_parent
= clid
;
807 tcm
->tcm_handle
= q
->handle
;
808 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
809 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
810 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
812 q
->qstats
.qlen
= q
->q
.qlen
;
814 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
815 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
818 if (q
->ops
->dump_stats
&& q
->ops
->dump_stats(q
, &d
) < 0)
821 if (gnet_stats_copy_basic(&d
, &q
->bstats
) < 0 ||
822 gnet_stats_copy_rate_est(&d
, &q
->rate_est
) < 0 ||
823 gnet_stats_copy_queue(&d
, &q
->qstats
) < 0)
826 if (gnet_stats_finish_copy(&d
) < 0)
829 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
838 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
839 u32 clid
, struct Qdisc
*old
, struct Qdisc
*new)
842 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
844 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
848 if (old
&& old
->handle
) {
849 if (tc_fill_qdisc(skb
, old
, clid
, pid
, n
->nlmsg_seq
, 0, RTM_DELQDISC
) < 0)
853 if (tc_fill_qdisc(skb
, new, clid
, pid
, n
->nlmsg_seq
, old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
858 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
865 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
869 struct net_device
*dev
;
873 s_q_idx
= q_idx
= cb
->args
[1];
874 read_lock(&dev_base_lock
);
876 for_each_netdev(dev
) {
882 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
883 if (q_idx
< s_q_idx
) {
887 if (tc_fill_qdisc(skb
, q
, q
->parent
, NETLINK_CB(cb
->skb
).pid
,
888 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0)
897 read_unlock(&dev_base_lock
);
907 /************************************************
908 * Traffic classes manipulation. *
909 ************************************************/
913 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
915 struct tcmsg
*tcm
= NLMSG_DATA(n
);
916 struct rtattr
**tca
= arg
;
917 struct net_device
*dev
;
918 struct Qdisc
*q
= NULL
;
919 struct Qdisc_class_ops
*cops
;
920 unsigned long cl
= 0;
921 unsigned long new_cl
;
922 u32 pid
= tcm
->tcm_parent
;
923 u32 clid
= tcm
->tcm_handle
;
924 u32 qid
= TC_H_MAJ(clid
);
927 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
931 parent == TC_H_UNSPEC - unspecified parent.
932 parent == TC_H_ROOT - class is root, which has no parent.
933 parent == X:0 - parent is root class.
934 parent == X:Y - parent is a node in hierarchy.
935 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
937 handle == 0:0 - generate handle from kernel pool.
938 handle == 0:Y - class is X:Y, where X:0 is qdisc.
939 handle == X:Y - clear.
940 handle == X:0 - root class.
943 /* Step 1. Determine qdisc handle X:0 */
945 if (pid
!= TC_H_ROOT
) {
946 u32 qid1
= TC_H_MAJ(pid
);
949 /* If both majors are known, they must be identical. */
955 qid
= dev
->qdisc_sleeping
->handle
;
957 /* Now qid is genuine qdisc handle consistent
958 both with parent and child.
960 TC_H_MAJ(pid) still may be unspecified, complete it now.
963 pid
= TC_H_MAKE(qid
, pid
);
966 qid
= dev
->qdisc_sleeping
->handle
;
969 /* OK. Locate qdisc */
970 if ((q
= qdisc_lookup(dev
, qid
)) == NULL
)
973 /* An check that it supports classes */
974 cops
= q
->ops
->cl_ops
;
978 /* Now try to get class */
980 if (pid
== TC_H_ROOT
)
983 clid
= TC_H_MAKE(qid
, clid
);
986 cl
= cops
->get(q
, clid
);
990 if (n
->nlmsg_type
!= RTM_NEWTCLASS
|| !(n
->nlmsg_flags
&NLM_F_CREATE
))
993 switch (n
->nlmsg_type
) {
996 if (n
->nlmsg_flags
&NLM_F_EXCL
)
1000 err
= cops
->delete(q
, cl
);
1002 tclass_notify(skb
, n
, q
, cl
, RTM_DELTCLASS
);
1005 err
= tclass_notify(skb
, n
, q
, cl
, RTM_NEWTCLASS
);
1014 err
= cops
->change(q
, clid
, pid
, tca
, &new_cl
);
1016 tclass_notify(skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
1026 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
1028 u32 pid
, u32 seq
, u16 flags
, int event
)
1031 struct nlmsghdr
*nlh
;
1032 unsigned char *b
= skb_tail_pointer(skb
);
1034 struct Qdisc_class_ops
*cl_ops
= q
->ops
->cl_ops
;
1036 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
1037 tcm
= NLMSG_DATA(nlh
);
1038 tcm
->tcm_family
= AF_UNSPEC
;
1039 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
1040 tcm
->tcm_parent
= q
->handle
;
1041 tcm
->tcm_handle
= q
->handle
;
1043 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
1044 if (cl_ops
->dump
&& cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
1045 goto rtattr_failure
;
1047 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
1048 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
1049 goto rtattr_failure
;
1051 if (cl_ops
->dump_stats
&& cl_ops
->dump_stats(q
, cl
, &d
) < 0)
1052 goto rtattr_failure
;
1054 if (gnet_stats_finish_copy(&d
) < 0)
1055 goto rtattr_failure
;
1057 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
1066 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
1067 struct Qdisc
*q
, unsigned long cl
, int event
)
1069 struct sk_buff
*skb
;
1070 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
1072 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1076 if (tc_fill_tclass(skb
, q
, cl
, pid
, n
->nlmsg_seq
, 0, event
) < 0) {
1081 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
1084 struct qdisc_dump_args
1086 struct qdisc_walker w
;
1087 struct sk_buff
*skb
;
1088 struct netlink_callback
*cb
;
1091 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
1093 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
1095 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).pid
,
1096 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
1099 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1103 struct net_device
*dev
;
1105 struct tcmsg
*tcm
= (struct tcmsg
*)NLMSG_DATA(cb
->nlh
);
1106 struct qdisc_dump_args arg
;
1108 if (cb
->nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*tcm
)))
1110 if ((dev
= dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
1116 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
1117 if (t
< s_t
|| !q
->ops
->cl_ops
||
1119 TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)) {
1124 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1125 arg
.w
.fn
= qdisc_class_dump
;
1129 arg
.w
.skip
= cb
->args
[1];
1131 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1132 cb
->args
[1] = arg
.w
.count
;
1144 /* Main classifier routine: scans classifier chain attached
1145 to this qdisc, (optionally) tests for protocol and asks
1146 specific classifiers.
1148 int tc_classify_compat(struct sk_buff
*skb
, struct tcf_proto
*tp
,
1149 struct tcf_result
*res
)
1151 __be16 protocol
= skb
->protocol
;
1154 for (; tp
; tp
= tp
->next
) {
1155 if ((tp
->protocol
== protocol
||
1156 tp
->protocol
== htons(ETH_P_ALL
)) &&
1157 (err
= tp
->classify(skb
, tp
, res
)) >= 0) {
1158 #ifdef CONFIG_NET_CLS_ACT
1159 if (err
!= TC_ACT_RECLASSIFY
&& skb
->tc_verd
)
1160 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
, 0);
1167 EXPORT_SYMBOL(tc_classify_compat
);
1169 int tc_classify(struct sk_buff
*skb
, struct tcf_proto
*tp
,
1170 struct tcf_result
*res
)
1174 #ifdef CONFIG_NET_CLS_ACT
1175 struct tcf_proto
*otp
= tp
;
1178 protocol
= skb
->protocol
;
1180 err
= tc_classify_compat(skb
, tp
, res
);
1181 #ifdef CONFIG_NET_CLS_ACT
1182 if (err
== TC_ACT_RECLASSIFY
) {
1183 u32 verd
= G_TC_VERD(skb
->tc_verd
);
1186 if (verd
++ >= MAX_REC_LOOP
) {
1187 printk("rule prio %u protocol %02x reclassify loop, "
1189 tp
->prio
&0xffff, ntohs(tp
->protocol
));
1192 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
, verd
);
1198 EXPORT_SYMBOL(tc_classify
);
1200 void tcf_destroy(struct tcf_proto
*tp
)
1202 tp
->ops
->destroy(tp
);
1203 module_put(tp
->ops
->owner
);
1207 void tcf_destroy_chain(struct tcf_proto
*fl
)
1209 struct tcf_proto
*tp
;
1211 while ((tp
= fl
) != NULL
) {
1216 EXPORT_SYMBOL(tcf_destroy_chain
);
1218 #ifdef CONFIG_PROC_FS
1219 static int psched_show(struct seq_file
*seq
, void *v
)
1221 seq_printf(seq
, "%08x %08x %08x %08x\n",
1222 (u32
)NSEC_PER_USEC
, (u32
)PSCHED_US2NS(1),
1224 (u32
)NSEC_PER_SEC
/(u32
)ktime_to_ns(KTIME_MONOTONIC_RES
));
1229 static int psched_open(struct inode
*inode
, struct file
*file
)
1231 return single_open(file
, psched_show
, PDE(inode
)->data
);
1234 static const struct file_operations psched_fops
= {
1235 .owner
= THIS_MODULE
,
1236 .open
= psched_open
,
1238 .llseek
= seq_lseek
,
1239 .release
= single_release
,
1243 static int __init
pktsched_init(void)
1245 register_qdisc(&pfifo_qdisc_ops
);
1246 register_qdisc(&bfifo_qdisc_ops
);
1247 proc_net_fops_create("psched", 0, &psched_fops
);
1249 rtnl_register(PF_UNSPEC
, RTM_NEWQDISC
, tc_modify_qdisc
, NULL
);
1250 rtnl_register(PF_UNSPEC
, RTM_DELQDISC
, tc_get_qdisc
, NULL
);
1251 rtnl_register(PF_UNSPEC
, RTM_GETQDISC
, tc_get_qdisc
, tc_dump_qdisc
);
1252 rtnl_register(PF_UNSPEC
, RTM_NEWTCLASS
, tc_ctl_tclass
, NULL
);
1253 rtnl_register(PF_UNSPEC
, RTM_DELTCLASS
, tc_ctl_tclass
, NULL
);
1254 rtnl_register(PF_UNSPEC
, RTM_GETTCLASS
, tc_ctl_tclass
, tc_dump_tclass
);
1259 subsys_initcall(pktsched_init
);
1261 EXPORT_SYMBOL(qdisc_get_rtab
);
1262 EXPORT_SYMBOL(qdisc_put_rtab
);
1263 EXPORT_SYMBOL(register_qdisc
);
1264 EXPORT_SYMBOL(unregister_qdisc
);