2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
34 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
, u32 clid
,
35 struct Qdisc
*old
, struct Qdisc
*new);
36 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
37 struct Qdisc
*q
, unsigned long cl
, int event
);
44 This file consists of two interrelated parts:
46 1. queueing disciplines manager frontend.
47 2. traffic classes manager frontend.
49 Generally, queueing discipline ("qdisc") is a black box,
50 which is able to enqueue packets and to dequeue them (when
51 device is ready to send something) in order and at times
52 determined by algorithm hidden in it.
54 qdisc's are divided to two categories:
55 - "queues", which have no internal structure visible from outside.
56 - "schedulers", which split all the packets to "traffic classes",
57 using "packet classifiers" (look at cls_api.c)
59 In turn, classes may have child qdiscs (as rule, queues)
60 attached to them etc. etc. etc.
62 The goal of the routines in this file is to translate
63 information supplied by user in the form of handles
64 to more intelligible for kernel form, to make some sanity
65 checks and part of work, which is common to all qdiscs
66 and to provide rtnetlink notifications.
68 All real intelligent work is done inside qdisc modules.
72 Every discipline has two major routines: enqueue and dequeue.
76 dequeue usually returns a skb to send. It is allowed to return NULL,
77 but it does not mean that queue is empty, it just means that
78 discipline does not want to send anything this time.
79 Queue is really empty if q->q.qlen == 0.
80 For complicated disciplines with multiple queues q->q is not
81 real packet queue, but however q->q.qlen must be valid.
85 enqueue returns 0, if packet was enqueued successfully.
86 If packet (this one or another one) was dropped, it returns
88 NET_XMIT_DROP - this packet dropped
89 Expected action: do not backoff, but wait until queue will clear.
90 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
91 Expected action: backoff or ignore
92 NET_XMIT_POLICED - dropped by police.
93 Expected action: backoff or error to real-time apps.
99 requeues once dequeued packet. It is used for non-standard or
100 just buggy devices, which can defer output even if dev->tbusy=0.
104 returns qdisc to initial state: purge all buffers, clear all
105 timers, counters (except for statistics) etc.
109 initializes newly created qdisc.
113 destroys resources allocated by init and during lifetime of qdisc.
117 changes qdisc parameters.
120 /* Protects list of registered TC modules. It is pure SMP lock. */
121 static DEFINE_RWLOCK(qdisc_mod_lock
);
124 /************************************************
125 * Queueing disciplines manipulation. *
126 ************************************************/
129 /* The list of all installed queueing disciplines. */
131 static struct Qdisc_ops
*qdisc_base
;
133 /* Register/uregister queueing discipline */
135 int register_qdisc(struct Qdisc_ops
*qops
)
137 struct Qdisc_ops
*q
, **qp
;
140 write_lock(&qdisc_mod_lock
);
141 for (qp
= &qdisc_base
; (q
= *qp
) != NULL
; qp
= &q
->next
)
142 if (!strcmp(qops
->id
, q
->id
))
145 if (qops
->enqueue
== NULL
)
146 qops
->enqueue
= noop_qdisc_ops
.enqueue
;
147 if (qops
->requeue
== NULL
)
148 qops
->requeue
= noop_qdisc_ops
.requeue
;
149 if (qops
->dequeue
== NULL
)
150 qops
->dequeue
= noop_qdisc_ops
.dequeue
;
156 write_unlock(&qdisc_mod_lock
);
160 int unregister_qdisc(struct Qdisc_ops
*qops
)
162 struct Qdisc_ops
*q
, **qp
;
165 write_lock(&qdisc_mod_lock
);
166 for (qp
= &qdisc_base
; (q
=*qp
)!=NULL
; qp
= &q
->next
)
174 write_unlock(&qdisc_mod_lock
);
178 /* We know handle. Find qdisc among all qdisc's attached to device
179 (root qdisc, all its children, children of children etc.)
182 struct Qdisc
*qdisc_lookup(struct net_device
*dev
, u32 handle
)
186 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
187 if (q
->handle
== handle
)
193 static struct Qdisc
*qdisc_leaf(struct Qdisc
*p
, u32 classid
)
197 struct Qdisc_class_ops
*cops
= p
->ops
->cl_ops
;
201 cl
= cops
->get(p
, classid
);
205 leaf
= cops
->leaf(p
, cl
);
210 /* Find queueing discipline by name */
212 static struct Qdisc_ops
*qdisc_lookup_ops(struct rtattr
*kind
)
214 struct Qdisc_ops
*q
= NULL
;
217 read_lock(&qdisc_mod_lock
);
218 for (q
= qdisc_base
; q
; q
= q
->next
) {
219 if (rtattr_strcmp(kind
, q
->id
) == 0) {
220 if (!try_module_get(q
->owner
))
225 read_unlock(&qdisc_mod_lock
);
230 static struct qdisc_rate_table
*qdisc_rtab_list
;
232 struct qdisc_rate_table
*qdisc_get_rtab(struct tc_ratespec
*r
, struct rtattr
*tab
)
234 struct qdisc_rate_table
*rtab
;
236 for (rtab
= qdisc_rtab_list
; rtab
; rtab
= rtab
->next
) {
237 if (memcmp(&rtab
->rate
, r
, sizeof(struct tc_ratespec
)) == 0) {
243 if (tab
== NULL
|| r
->rate
== 0 || r
->cell_log
== 0 || RTA_PAYLOAD(tab
) != 1024)
246 rtab
= kmalloc(sizeof(*rtab
), GFP_KERNEL
);
250 memcpy(rtab
->data
, RTA_DATA(tab
), 1024);
251 rtab
->next
= qdisc_rtab_list
;
252 qdisc_rtab_list
= rtab
;
257 void qdisc_put_rtab(struct qdisc_rate_table
*tab
)
259 struct qdisc_rate_table
*rtab
, **rtabp
;
261 if (!tab
|| --tab
->refcnt
)
264 for (rtabp
= &qdisc_rtab_list
; (rtab
=*rtabp
) != NULL
; rtabp
= &rtab
->next
) {
273 static enum hrtimer_restart
qdisc_watchdog(struct hrtimer
*timer
)
275 struct qdisc_watchdog
*wd
= container_of(timer
, struct qdisc_watchdog
,
277 struct net_device
*dev
= wd
->qdisc
->dev
;
279 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
281 if (spin_trylock(&dev
->queue_lock
)) {
283 spin_unlock(&dev
->queue_lock
);
287 return HRTIMER_NORESTART
;
290 void qdisc_watchdog_init(struct qdisc_watchdog
*wd
, struct Qdisc
*qdisc
)
292 hrtimer_init(&wd
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_ABS
);
293 wd
->timer
.function
= qdisc_watchdog
;
296 EXPORT_SYMBOL(qdisc_watchdog_init
);
298 void qdisc_watchdog_schedule(struct qdisc_watchdog
*wd
, psched_time_t expires
)
302 wd
->qdisc
->flags
|= TCQ_F_THROTTLED
;
303 time
= ktime_set(0, 0);
304 time
= ktime_add_ns(time
, PSCHED_US2NS(expires
));
305 hrtimer_start(&wd
->timer
, time
, HRTIMER_MODE_ABS
);
307 EXPORT_SYMBOL(qdisc_watchdog_schedule
);
309 void qdisc_watchdog_cancel(struct qdisc_watchdog
*wd
)
311 hrtimer_cancel(&wd
->timer
);
312 wd
->qdisc
->flags
&= ~TCQ_F_THROTTLED
;
314 EXPORT_SYMBOL(qdisc_watchdog_cancel
);
316 /* Allocate an unique handle from space managed by kernel */
318 static u32
qdisc_alloc_handle(struct net_device
*dev
)
321 static u32 autohandle
= TC_H_MAKE(0x80000000U
, 0);
324 autohandle
+= TC_H_MAKE(0x10000U
, 0);
325 if (autohandle
== TC_H_MAKE(TC_H_ROOT
, 0))
326 autohandle
= TC_H_MAKE(0x80000000U
, 0);
327 } while (qdisc_lookup(dev
, autohandle
) && --i
> 0);
329 return i
>0 ? autohandle
: 0;
332 /* Attach toplevel qdisc to device dev */
334 static struct Qdisc
*
335 dev_graft_qdisc(struct net_device
*dev
, struct Qdisc
*qdisc
)
337 struct Qdisc
*oqdisc
;
339 if (dev
->flags
& IFF_UP
)
342 qdisc_lock_tree(dev
);
343 if (qdisc
&& qdisc
->flags
&TCQ_F_INGRESS
) {
344 oqdisc
= dev
->qdisc_ingress
;
345 /* Prune old scheduler */
346 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1) {
349 dev
->qdisc_ingress
= NULL
;
351 dev
->qdisc_ingress
= qdisc
;
356 oqdisc
= dev
->qdisc_sleeping
;
358 /* Prune old scheduler */
359 if (oqdisc
&& atomic_read(&oqdisc
->refcnt
) <= 1)
362 /* ... and graft new one */
365 dev
->qdisc_sleeping
= qdisc
;
366 dev
->qdisc
= &noop_qdisc
;
369 qdisc_unlock_tree(dev
);
371 if (dev
->flags
& IFF_UP
)
377 void qdisc_tree_decrease_qlen(struct Qdisc
*sch
, unsigned int n
)
379 struct Qdisc_class_ops
*cops
;
385 while ((parentid
= sch
->parent
)) {
386 sch
= qdisc_lookup(sch
->dev
, TC_H_MAJ(parentid
));
387 cops
= sch
->ops
->cl_ops
;
388 if (cops
->qlen_notify
) {
389 cl
= cops
->get(sch
, parentid
);
390 cops
->qlen_notify(sch
, cl
);
396 EXPORT_SYMBOL(qdisc_tree_decrease_qlen
);
398 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
401 Old qdisc is not destroyed but returned in *old.
404 static int qdisc_graft(struct net_device
*dev
, struct Qdisc
*parent
,
406 struct Qdisc
*new, struct Qdisc
**old
)
409 struct Qdisc
*q
= *old
;
412 if (parent
== NULL
) {
413 if (q
&& q
->flags
&TCQ_F_INGRESS
) {
414 *old
= dev_graft_qdisc(dev
, q
);
416 *old
= dev_graft_qdisc(dev
, new);
419 struct Qdisc_class_ops
*cops
= parent
->ops
->cl_ops
;
424 unsigned long cl
= cops
->get(parent
, classid
);
426 err
= cops
->graft(parent
, cl
, new, old
);
428 new->parent
= classid
;
429 cops
->put(parent
, cl
);
437 Allocate and initialize new qdisc.
439 Parameters are passed via opt.
442 static struct Qdisc
*
443 qdisc_create(struct net_device
*dev
, u32 handle
, struct rtattr
**tca
, int *errp
)
446 struct rtattr
*kind
= tca
[TCA_KIND
-1];
448 struct Qdisc_ops
*ops
;
450 ops
= qdisc_lookup_ops(kind
);
452 if (ops
== NULL
&& kind
!= NULL
) {
454 if (rtattr_strlcpy(name
, kind
, IFNAMSIZ
) < IFNAMSIZ
) {
455 /* We dropped the RTNL semaphore in order to
456 * perform the module load. So, even if we
457 * succeeded in loading the module we have to
458 * tell the caller to replay the request. We
459 * indicate this using -EAGAIN.
460 * We replay the request because the device may
461 * go away in the mean time.
464 request_module("sch_%s", name
);
466 ops
= qdisc_lookup_ops(kind
);
468 /* We will try again qdisc_lookup_ops,
469 * so don't keep a reference.
471 module_put(ops
->owner
);
483 sch
= qdisc_alloc(dev
, ops
);
489 if (handle
== TC_H_INGRESS
) {
490 sch
->flags
|= TCQ_F_INGRESS
;
491 sch
->stats_lock
= &dev
->ingress_lock
;
492 handle
= TC_H_MAKE(TC_H_INGRESS
, 0);
494 sch
->stats_lock
= &dev
->queue_lock
;
496 handle
= qdisc_alloc_handle(dev
);
503 sch
->handle
= handle
;
505 if (!ops
->init
|| (err
= ops
->init(sch
, tca
[TCA_OPTIONS
-1])) == 0) {
506 if (tca
[TCA_RATE
-1]) {
507 err
= gen_new_estimator(&sch
->bstats
, &sch
->rate_est
,
512 * Any broken qdiscs that would require
513 * a ops->reset() here? The qdisc was never
514 * in action so it shouldn't be necessary.
521 qdisc_lock_tree(dev
);
522 list_add_tail(&sch
->list
, &dev
->qdisc_list
);
523 qdisc_unlock_tree(dev
);
529 kfree((char *) sch
- sch
->padded
);
531 module_put(ops
->owner
);
537 static int qdisc_change(struct Qdisc
*sch
, struct rtattr
**tca
)
539 if (tca
[TCA_OPTIONS
-1]) {
542 if (sch
->ops
->change
== NULL
)
544 err
= sch
->ops
->change(sch
, tca
[TCA_OPTIONS
-1]);
549 gen_replace_estimator(&sch
->bstats
, &sch
->rate_est
,
550 sch
->stats_lock
, tca
[TCA_RATE
-1]);
554 struct check_loop_arg
556 struct qdisc_walker w
;
561 static int check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
);
563 static int check_loop(struct Qdisc
*q
, struct Qdisc
*p
, int depth
)
565 struct check_loop_arg arg
;
567 if (q
->ops
->cl_ops
== NULL
)
570 arg
.w
.stop
= arg
.w
.skip
= arg
.w
.count
= 0;
571 arg
.w
.fn
= check_loop_fn
;
574 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
575 return arg
.w
.stop
? -ELOOP
: 0;
579 check_loop_fn(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*w
)
582 struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
583 struct check_loop_arg
*arg
= (struct check_loop_arg
*)w
;
585 leaf
= cops
->leaf(q
, cl
);
587 if (leaf
== arg
->p
|| arg
->depth
> 7)
589 return check_loop(leaf
, arg
->p
, arg
->depth
+ 1);
598 static int tc_get_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
600 struct tcmsg
*tcm
= NLMSG_DATA(n
);
601 struct rtattr
**tca
= arg
;
602 struct net_device
*dev
;
603 u32 clid
= tcm
->tcm_parent
;
604 struct Qdisc
*q
= NULL
;
605 struct Qdisc
*p
= NULL
;
608 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
612 if (clid
!= TC_H_ROOT
) {
613 if (TC_H_MAJ(clid
) != TC_H_MAJ(TC_H_INGRESS
)) {
614 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
616 q
= qdisc_leaf(p
, clid
);
617 } else { /* ingress */
618 q
= dev
->qdisc_ingress
;
621 q
= dev
->qdisc_sleeping
;
626 if (tcm
->tcm_handle
&& q
->handle
!= tcm
->tcm_handle
)
629 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
633 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
636 if (n
->nlmsg_type
== RTM_DELQDISC
) {
641 if ((err
= qdisc_graft(dev
, p
, clid
, NULL
, &q
)) != 0)
644 qdisc_notify(skb
, n
, clid
, q
, NULL
);
645 qdisc_lock_tree(dev
);
647 qdisc_unlock_tree(dev
);
650 qdisc_notify(skb
, n
, clid
, NULL
, q
);
659 static int tc_modify_qdisc(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
663 struct net_device
*dev
;
669 /* Reinit, just in case something touches this. */
672 clid
= tcm
->tcm_parent
;
675 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
679 if (clid
!= TC_H_ROOT
) {
680 if (clid
!= TC_H_INGRESS
) {
681 if ((p
= qdisc_lookup(dev
, TC_H_MAJ(clid
))) == NULL
)
683 q
= qdisc_leaf(p
, clid
);
684 } else { /*ingress */
685 q
= dev
->qdisc_ingress
;
688 q
= dev
->qdisc_sleeping
;
691 /* It may be default qdisc, ignore it */
692 if (q
&& q
->handle
== 0)
695 if (!q
|| !tcm
->tcm_handle
|| q
->handle
!= tcm
->tcm_handle
) {
696 if (tcm
->tcm_handle
) {
697 if (q
&& !(n
->nlmsg_flags
&NLM_F_REPLACE
))
699 if (TC_H_MIN(tcm
->tcm_handle
))
701 if ((q
= qdisc_lookup(dev
, tcm
->tcm_handle
)) == NULL
)
703 if (n
->nlmsg_flags
&NLM_F_EXCL
)
705 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
708 (p
&& check_loop(q
, p
, 0)))
710 atomic_inc(&q
->refcnt
);
716 /* This magic test requires explanation.
718 * We know, that some child q is already
719 * attached to this parent and have choice:
720 * either to change it or to create/graft new one.
722 * 1. We are allowed to create/graft only
723 * if CREATE and REPLACE flags are set.
725 * 2. If EXCL is set, requestor wanted to say,
726 * that qdisc tcm_handle is not expected
727 * to exist, so that we choose create/graft too.
729 * 3. The last case is when no flags are set.
730 * Alas, it is sort of hole in API, we
731 * cannot decide what to do unambiguously.
732 * For now we select create/graft, if
733 * user gave KIND, which does not match existing.
735 if ((n
->nlmsg_flags
&NLM_F_CREATE
) &&
736 (n
->nlmsg_flags
&NLM_F_REPLACE
) &&
737 ((n
->nlmsg_flags
&NLM_F_EXCL
) ||
739 rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))))
744 if (!tcm
->tcm_handle
)
746 q
= qdisc_lookup(dev
, tcm
->tcm_handle
);
749 /* Change qdisc parameters */
752 if (n
->nlmsg_flags
&NLM_F_EXCL
)
754 if (tca
[TCA_KIND
-1] && rtattr_strcmp(tca
[TCA_KIND
-1], q
->ops
->id
))
756 err
= qdisc_change(q
, tca
);
758 qdisc_notify(skb
, n
, clid
, NULL
, q
);
762 if (!(n
->nlmsg_flags
&NLM_F_CREATE
))
764 if (clid
== TC_H_INGRESS
)
765 q
= qdisc_create(dev
, tcm
->tcm_parent
, tca
, &err
);
767 q
= qdisc_create(dev
, tcm
->tcm_handle
, tca
, &err
);
776 struct Qdisc
*old_q
= NULL
;
777 err
= qdisc_graft(dev
, p
, clid
, q
, &old_q
);
780 qdisc_lock_tree(dev
);
782 qdisc_unlock_tree(dev
);
786 qdisc_notify(skb
, n
, clid
, old_q
, q
);
788 qdisc_lock_tree(dev
);
789 qdisc_destroy(old_q
);
790 qdisc_unlock_tree(dev
);
796 static int tc_fill_qdisc(struct sk_buff
*skb
, struct Qdisc
*q
, u32 clid
,
797 u32 pid
, u32 seq
, u16 flags
, int event
)
800 struct nlmsghdr
*nlh
;
801 unsigned char *b
= skb_tail_pointer(skb
);
804 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
805 tcm
= NLMSG_DATA(nlh
);
806 tcm
->tcm_family
= AF_UNSPEC
;
809 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
810 tcm
->tcm_parent
= clid
;
811 tcm
->tcm_handle
= q
->handle
;
812 tcm
->tcm_info
= atomic_read(&q
->refcnt
);
813 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
814 if (q
->ops
->dump
&& q
->ops
->dump(q
, skb
) < 0)
816 q
->qstats
.qlen
= q
->q
.qlen
;
818 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
819 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
822 if (q
->ops
->dump_stats
&& q
->ops
->dump_stats(q
, &d
) < 0)
825 if (gnet_stats_copy_basic(&d
, &q
->bstats
) < 0 ||
826 gnet_stats_copy_rate_est(&d
, &q
->rate_est
) < 0 ||
827 gnet_stats_copy_queue(&d
, &q
->qstats
) < 0)
830 if (gnet_stats_finish_copy(&d
) < 0)
833 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
842 static int qdisc_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
843 u32 clid
, struct Qdisc
*old
, struct Qdisc
*new)
846 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
848 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
852 if (old
&& old
->handle
) {
853 if (tc_fill_qdisc(skb
, old
, clid
, pid
, n
->nlmsg_seq
, 0, RTM_DELQDISC
) < 0)
857 if (tc_fill_qdisc(skb
, new, clid
, pid
, n
->nlmsg_seq
, old
? NLM_F_REPLACE
: 0, RTM_NEWQDISC
) < 0)
862 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
869 static int tc_dump_qdisc(struct sk_buff
*skb
, struct netlink_callback
*cb
)
873 struct net_device
*dev
;
877 s_q_idx
= q_idx
= cb
->args
[1];
878 read_lock(&dev_base_lock
);
880 for_each_netdev(dev
) {
886 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
887 if (q_idx
< s_q_idx
) {
891 if (tc_fill_qdisc(skb
, q
, q
->parent
, NETLINK_CB(cb
->skb
).pid
,
892 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWQDISC
) <= 0)
901 read_unlock(&dev_base_lock
);
911 /************************************************
912 * Traffic classes manipulation. *
913 ************************************************/
917 static int tc_ctl_tclass(struct sk_buff
*skb
, struct nlmsghdr
*n
, void *arg
)
919 struct tcmsg
*tcm
= NLMSG_DATA(n
);
920 struct rtattr
**tca
= arg
;
921 struct net_device
*dev
;
922 struct Qdisc
*q
= NULL
;
923 struct Qdisc_class_ops
*cops
;
924 unsigned long cl
= 0;
925 unsigned long new_cl
;
926 u32 pid
= tcm
->tcm_parent
;
927 u32 clid
= tcm
->tcm_handle
;
928 u32 qid
= TC_H_MAJ(clid
);
931 if ((dev
= __dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
935 parent == TC_H_UNSPEC - unspecified parent.
936 parent == TC_H_ROOT - class is root, which has no parent.
937 parent == X:0 - parent is root class.
938 parent == X:Y - parent is a node in hierarchy.
939 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
941 handle == 0:0 - generate handle from kernel pool.
942 handle == 0:Y - class is X:Y, where X:0 is qdisc.
943 handle == X:Y - clear.
944 handle == X:0 - root class.
947 /* Step 1. Determine qdisc handle X:0 */
949 if (pid
!= TC_H_ROOT
) {
950 u32 qid1
= TC_H_MAJ(pid
);
953 /* If both majors are known, they must be identical. */
959 qid
= dev
->qdisc_sleeping
->handle
;
961 /* Now qid is genuine qdisc handle consistent
962 both with parent and child.
964 TC_H_MAJ(pid) still may be unspecified, complete it now.
967 pid
= TC_H_MAKE(qid
, pid
);
970 qid
= dev
->qdisc_sleeping
->handle
;
973 /* OK. Locate qdisc */
974 if ((q
= qdisc_lookup(dev
, qid
)) == NULL
)
977 /* An check that it supports classes */
978 cops
= q
->ops
->cl_ops
;
982 /* Now try to get class */
984 if (pid
== TC_H_ROOT
)
987 clid
= TC_H_MAKE(qid
, clid
);
990 cl
= cops
->get(q
, clid
);
994 if (n
->nlmsg_type
!= RTM_NEWTCLASS
|| !(n
->nlmsg_flags
&NLM_F_CREATE
))
997 switch (n
->nlmsg_type
) {
1000 if (n
->nlmsg_flags
&NLM_F_EXCL
)
1004 err
= cops
->delete(q
, cl
);
1006 tclass_notify(skb
, n
, q
, cl
, RTM_DELTCLASS
);
1009 err
= tclass_notify(skb
, n
, q
, cl
, RTM_NEWTCLASS
);
1018 err
= cops
->change(q
, clid
, pid
, tca
, &new_cl
);
1020 tclass_notify(skb
, n
, q
, new_cl
, RTM_NEWTCLASS
);
1030 static int tc_fill_tclass(struct sk_buff
*skb
, struct Qdisc
*q
,
1032 u32 pid
, u32 seq
, u16 flags
, int event
)
1035 struct nlmsghdr
*nlh
;
1036 unsigned char *b
= skb_tail_pointer(skb
);
1038 struct Qdisc_class_ops
*cl_ops
= q
->ops
->cl_ops
;
1040 nlh
= NLMSG_NEW(skb
, pid
, seq
, event
, sizeof(*tcm
), flags
);
1041 tcm
= NLMSG_DATA(nlh
);
1042 tcm
->tcm_family
= AF_UNSPEC
;
1043 tcm
->tcm_ifindex
= q
->dev
->ifindex
;
1044 tcm
->tcm_parent
= q
->handle
;
1045 tcm
->tcm_handle
= q
->handle
;
1047 RTA_PUT(skb
, TCA_KIND
, IFNAMSIZ
, q
->ops
->id
);
1048 if (cl_ops
->dump
&& cl_ops
->dump(q
, cl
, skb
, tcm
) < 0)
1049 goto rtattr_failure
;
1051 if (gnet_stats_start_copy_compat(skb
, TCA_STATS2
, TCA_STATS
,
1052 TCA_XSTATS
, q
->stats_lock
, &d
) < 0)
1053 goto rtattr_failure
;
1055 if (cl_ops
->dump_stats
&& cl_ops
->dump_stats(q
, cl
, &d
) < 0)
1056 goto rtattr_failure
;
1058 if (gnet_stats_finish_copy(&d
) < 0)
1059 goto rtattr_failure
;
1061 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
1070 static int tclass_notify(struct sk_buff
*oskb
, struct nlmsghdr
*n
,
1071 struct Qdisc
*q
, unsigned long cl
, int event
)
1073 struct sk_buff
*skb
;
1074 u32 pid
= oskb
? NETLINK_CB(oskb
).pid
: 0;
1076 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1080 if (tc_fill_tclass(skb
, q
, cl
, pid
, n
->nlmsg_seq
, 0, event
) < 0) {
1085 return rtnetlink_send(skb
, pid
, RTNLGRP_TC
, n
->nlmsg_flags
&NLM_F_ECHO
);
1088 struct qdisc_dump_args
1090 struct qdisc_walker w
;
1091 struct sk_buff
*skb
;
1092 struct netlink_callback
*cb
;
1095 static int qdisc_class_dump(struct Qdisc
*q
, unsigned long cl
, struct qdisc_walker
*arg
)
1097 struct qdisc_dump_args
*a
= (struct qdisc_dump_args
*)arg
;
1099 return tc_fill_tclass(a
->skb
, q
, cl
, NETLINK_CB(a
->cb
->skb
).pid
,
1100 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
, RTM_NEWTCLASS
);
1103 static int tc_dump_tclass(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1107 struct net_device
*dev
;
1109 struct tcmsg
*tcm
= (struct tcmsg
*)NLMSG_DATA(cb
->nlh
);
1110 struct qdisc_dump_args arg
;
1112 if (cb
->nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*tcm
)))
1114 if ((dev
= dev_get_by_index(tcm
->tcm_ifindex
)) == NULL
)
1120 list_for_each_entry(q
, &dev
->qdisc_list
, list
) {
1121 if (t
< s_t
|| !q
->ops
->cl_ops
||
1123 TC_H_MAJ(tcm
->tcm_parent
) != q
->handle
)) {
1128 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1129 arg
.w
.fn
= qdisc_class_dump
;
1133 arg
.w
.skip
= cb
->args
[1];
1135 q
->ops
->cl_ops
->walk(q
, &arg
.w
);
1136 cb
->args
[1] = arg
.w
.count
;
1148 /* Main classifier routine: scans classifier chain attached
1149 to this qdisc, (optionally) tests for protocol and asks
1150 specific classifiers.
1152 int tc_classify(struct sk_buff
*skb
, struct tcf_proto
*tp
,
1153 struct tcf_result
*res
)
1156 __be16 protocol
= skb
->protocol
;
1157 #ifdef CONFIG_NET_CLS_ACT
1158 struct tcf_proto
*otp
= tp
;
1161 protocol
= skb
->protocol
;
1163 for ( ; tp
; tp
= tp
->next
) {
1164 if ((tp
->protocol
== protocol
||
1165 tp
->protocol
== htons(ETH_P_ALL
)) &&
1166 (err
= tp
->classify(skb
, tp
, res
)) >= 0) {
1167 #ifdef CONFIG_NET_CLS_ACT
1168 if ( TC_ACT_RECLASSIFY
== err
) {
1169 __u32 verd
= (__u32
) G_TC_VERD(skb
->tc_verd
);
1172 if (MAX_REC_LOOP
< verd
++) {
1173 printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
1174 tp
->prio
&0xffff, ntohs(tp
->protocol
));
1177 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
,verd
);
1181 skb
->tc_verd
= SET_TC_VERD(skb
->tc_verd
,0);
1194 void tcf_destroy(struct tcf_proto
*tp
)
1196 tp
->ops
->destroy(tp
);
1197 module_put(tp
->ops
->owner
);
1201 void tcf_destroy_chain(struct tcf_proto
*fl
)
1203 struct tcf_proto
*tp
;
1205 while ((tp
= fl
) != NULL
) {
1210 EXPORT_SYMBOL(tcf_destroy_chain
);
1212 #ifdef CONFIG_PROC_FS
1213 static int psched_show(struct seq_file
*seq
, void *v
)
1215 seq_printf(seq
, "%08x %08x %08x %08x\n",
1216 (u32
)NSEC_PER_USEC
, (u32
)PSCHED_US2NS(1),
1218 (u32
)NSEC_PER_SEC
/(u32
)ktime_to_ns(KTIME_MONOTONIC_RES
));
1223 static int psched_open(struct inode
*inode
, struct file
*file
)
1225 return single_open(file
, psched_show
, PDE(inode
)->data
);
1228 static const struct file_operations psched_fops
= {
1229 .owner
= THIS_MODULE
,
1230 .open
= psched_open
,
1232 .llseek
= seq_lseek
,
1233 .release
= single_release
,
1237 static int __init
pktsched_init(void)
1239 register_qdisc(&pfifo_qdisc_ops
);
1240 register_qdisc(&bfifo_qdisc_ops
);
1241 proc_net_fops_create("psched", 0, &psched_fops
);
1243 rtnl_register(PF_UNSPEC
, RTM_NEWQDISC
, tc_modify_qdisc
, NULL
);
1244 rtnl_register(PF_UNSPEC
, RTM_DELQDISC
, tc_get_qdisc
, NULL
);
1245 rtnl_register(PF_UNSPEC
, RTM_GETQDISC
, tc_get_qdisc
, tc_dump_qdisc
);
1246 rtnl_register(PF_UNSPEC
, RTM_NEWTCLASS
, tc_ctl_tclass
, NULL
);
1247 rtnl_register(PF_UNSPEC
, RTM_DELTCLASS
, tc_ctl_tclass
, NULL
);
1248 rtnl_register(PF_UNSPEC
, RTM_GETTCLASS
, tc_ctl_tclass
, tc_dump_tclass
);
1253 subsys_initcall(pktsched_init
);
1255 EXPORT_SYMBOL(qdisc_get_rtab
);
1256 EXPORT_SYMBOL(qdisc_put_rtab
);
1257 EXPORT_SYMBOL(register_qdisc
);
1258 EXPORT_SYMBOL(unregister_qdisc
);
1259 EXPORT_SYMBOL(tc_classify
);