pkt_sched: Grab correct lock in notify_and_destroy().
[linux-2.6/verdex.git] / net / sched / sch_api.c
blobc8dc72e12107a8ac1d2aefd498c84a12ad3e3dec
1 /*
2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * Fixes:
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
31 #include <net/net_namespace.h>
32 #include <net/sock.h>
33 #include <net/netlink.h>
34 #include <net/pkt_sched.h>
36 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
43 Short review.
44 -------------
46 This file consists of two interrelated parts:
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
70 All real intelligent work is done inside qdisc modules.
74 Every discipline has two major routines: enqueue and dequeue.
76 ---dequeue
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
85 ---enqueue
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
89 not zero error code.
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
97 Auxiliary routines:
99 ---requeue
101 requeues once dequeued packet. It is used for non-standard or
102 just buggy devices, which can defer output even if netif_queue_stopped()=0.
104 ---reset
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
109 ---init
111 initializes newly created qdisc.
113 ---destroy
115 destroys resources allocated by init and during lifetime of qdisc.
117 ---change
119 changes qdisc parameters.
122 /* Protects list of registered TC modules. It is pure SMP lock. */
123 static DEFINE_RWLOCK(qdisc_mod_lock);
126 /************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
131 /* The list of all installed queueing disciplines. */
133 static struct Qdisc_ops *qdisc_base;
135 /* Register/uregister queueing discipline */
137 int register_qdisc(struct Qdisc_ops *qops)
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
154 qops->next = NULL;
155 *qp = qops;
156 rc = 0;
157 out:
158 write_unlock(&qdisc_mod_lock);
159 return rc;
161 EXPORT_SYMBOL(register_qdisc);
163 int unregister_qdisc(struct Qdisc_ops *qops)
165 struct Qdisc_ops *q, **qp;
166 int err = -ENOENT;
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
170 if (q == qops)
171 break;
172 if (q) {
173 *qp = q->next;
174 q->next = NULL;
175 err = 0;
177 write_unlock(&qdisc_mod_lock);
178 return err;
180 EXPORT_SYMBOL(unregister_qdisc);
182 /* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
186 struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
188 struct Qdisc *q;
190 if (!(root->flags & TCQ_F_BUILTIN) &&
191 root->handle == handle)
192 return root;
194 list_for_each_entry(q, &root->list, list) {
195 if (q->handle == handle)
196 return q;
198 return NULL;
201 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
203 unsigned int i;
205 for (i = 0; i < dev->num_tx_queues; i++) {
206 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
207 struct Qdisc *q, *txq_root = txq->qdisc_sleeping;
209 q = qdisc_match_from_root(txq_root, handle);
210 if (q)
211 return q;
213 return qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
216 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
218 unsigned long cl;
219 struct Qdisc *leaf;
220 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
222 if (cops == NULL)
223 return NULL;
224 cl = cops->get(p, classid);
226 if (cl == 0)
227 return NULL;
228 leaf = cops->leaf(p, cl);
229 cops->put(p, cl);
230 return leaf;
233 /* Find queueing discipline by name */
235 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
237 struct Qdisc_ops *q = NULL;
239 if (kind) {
240 read_lock(&qdisc_mod_lock);
241 for (q = qdisc_base; q; q = q->next) {
242 if (nla_strcmp(kind, q->id) == 0) {
243 if (!try_module_get(q->owner))
244 q = NULL;
245 break;
248 read_unlock(&qdisc_mod_lock);
250 return q;
253 static struct qdisc_rate_table *qdisc_rtab_list;
255 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
257 struct qdisc_rate_table *rtab;
259 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
260 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
261 rtab->refcnt++;
262 return rtab;
266 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
267 nla_len(tab) != TC_RTAB_SIZE)
268 return NULL;
270 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
271 if (rtab) {
272 rtab->rate = *r;
273 rtab->refcnt = 1;
274 memcpy(rtab->data, nla_data(tab), 1024);
275 rtab->next = qdisc_rtab_list;
276 qdisc_rtab_list = rtab;
278 return rtab;
280 EXPORT_SYMBOL(qdisc_get_rtab);
282 void qdisc_put_rtab(struct qdisc_rate_table *tab)
284 struct qdisc_rate_table *rtab, **rtabp;
286 if (!tab || --tab->refcnt)
287 return;
289 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
290 if (rtab == tab) {
291 *rtabp = rtab->next;
292 kfree(rtab);
293 return;
297 EXPORT_SYMBOL(qdisc_put_rtab);
299 static LIST_HEAD(qdisc_stab_list);
300 static DEFINE_SPINLOCK(qdisc_stab_lock);
302 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
303 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
304 [TCA_STAB_DATA] = { .type = NLA_BINARY },
307 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
309 struct nlattr *tb[TCA_STAB_MAX + 1];
310 struct qdisc_size_table *stab;
311 struct tc_sizespec *s;
312 unsigned int tsize = 0;
313 u16 *tab = NULL;
314 int err;
316 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
317 if (err < 0)
318 return ERR_PTR(err);
319 if (!tb[TCA_STAB_BASE])
320 return ERR_PTR(-EINVAL);
322 s = nla_data(tb[TCA_STAB_BASE]);
324 if (s->tsize > 0) {
325 if (!tb[TCA_STAB_DATA])
326 return ERR_PTR(-EINVAL);
327 tab = nla_data(tb[TCA_STAB_DATA]);
328 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
331 if (!s || tsize != s->tsize || (!tab && tsize > 0))
332 return ERR_PTR(-EINVAL);
334 spin_lock_bh(&qdisc_stab_lock);
336 list_for_each_entry(stab, &qdisc_stab_list, list) {
337 if (memcmp(&stab->szopts, s, sizeof(*s)))
338 continue;
339 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
340 continue;
341 stab->refcnt++;
342 spin_unlock_bh(&qdisc_stab_lock);
343 return stab;
346 spin_unlock_bh(&qdisc_stab_lock);
348 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
349 if (!stab)
350 return ERR_PTR(-ENOMEM);
352 stab->refcnt = 1;
353 stab->szopts = *s;
354 if (tsize > 0)
355 memcpy(stab->data, tab, tsize * sizeof(u16));
357 spin_lock_bh(&qdisc_stab_lock);
358 list_add_tail(&stab->list, &qdisc_stab_list);
359 spin_unlock_bh(&qdisc_stab_lock);
361 return stab;
364 void qdisc_put_stab(struct qdisc_size_table *tab)
366 if (!tab)
367 return;
369 spin_lock_bh(&qdisc_stab_lock);
371 if (--tab->refcnt == 0) {
372 list_del(&tab->list);
373 kfree(tab);
376 spin_unlock_bh(&qdisc_stab_lock);
378 EXPORT_SYMBOL(qdisc_put_stab);
380 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
382 struct nlattr *nest;
384 nest = nla_nest_start(skb, TCA_STAB);
385 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
386 nla_nest_end(skb, nest);
388 return skb->len;
390 nla_put_failure:
391 return -1;
394 void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
396 int pkt_len, slot;
398 pkt_len = skb->len + stab->szopts.overhead;
399 if (unlikely(!stab->szopts.tsize))
400 goto out;
402 slot = pkt_len + stab->szopts.cell_align;
403 if (unlikely(slot < 0))
404 slot = 0;
406 slot >>= stab->szopts.cell_log;
407 if (likely(slot < stab->szopts.tsize))
408 pkt_len = stab->data[slot];
409 else
410 pkt_len = stab->data[stab->szopts.tsize - 1] *
411 (slot / stab->szopts.tsize) +
412 stab->data[slot % stab->szopts.tsize];
414 pkt_len <<= stab->szopts.size_log;
415 out:
416 if (unlikely(pkt_len < 1))
417 pkt_len = 1;
418 qdisc_skb_cb(skb)->pkt_len = pkt_len;
420 EXPORT_SYMBOL(qdisc_calculate_pkt_len);
422 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
424 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
425 timer);
427 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
428 smp_wmb();
429 __netif_schedule(wd->qdisc);
431 return HRTIMER_NORESTART;
434 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
436 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
437 wd->timer.function = qdisc_watchdog;
438 wd->qdisc = qdisc;
440 EXPORT_SYMBOL(qdisc_watchdog_init);
442 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
444 ktime_t time;
446 wd->qdisc->flags |= TCQ_F_THROTTLED;
447 time = ktime_set(0, 0);
448 time = ktime_add_ns(time, PSCHED_US2NS(expires));
449 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
451 EXPORT_SYMBOL(qdisc_watchdog_schedule);
453 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
455 hrtimer_cancel(&wd->timer);
456 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
458 EXPORT_SYMBOL(qdisc_watchdog_cancel);
460 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
462 unsigned int size = n * sizeof(struct hlist_head), i;
463 struct hlist_head *h;
465 if (size <= PAGE_SIZE)
466 h = kmalloc(size, GFP_KERNEL);
467 else
468 h = (struct hlist_head *)
469 __get_free_pages(GFP_KERNEL, get_order(size));
471 if (h != NULL) {
472 for (i = 0; i < n; i++)
473 INIT_HLIST_HEAD(&h[i]);
475 return h;
478 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
480 unsigned int size = n * sizeof(struct hlist_head);
482 if (size <= PAGE_SIZE)
483 kfree(h);
484 else
485 free_pages((unsigned long)h, get_order(size));
488 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
490 struct Qdisc_class_common *cl;
491 struct hlist_node *n, *next;
492 struct hlist_head *nhash, *ohash;
493 unsigned int nsize, nmask, osize;
494 unsigned int i, h;
496 /* Rehash when load factor exceeds 0.75 */
497 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
498 return;
499 nsize = clhash->hashsize * 2;
500 nmask = nsize - 1;
501 nhash = qdisc_class_hash_alloc(nsize);
502 if (nhash == NULL)
503 return;
505 ohash = clhash->hash;
506 osize = clhash->hashsize;
508 sch_tree_lock(sch);
509 for (i = 0; i < osize; i++) {
510 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
511 h = qdisc_class_hash(cl->classid, nmask);
512 hlist_add_head(&cl->hnode, &nhash[h]);
515 clhash->hash = nhash;
516 clhash->hashsize = nsize;
517 clhash->hashmask = nmask;
518 sch_tree_unlock(sch);
520 qdisc_class_hash_free(ohash, osize);
522 EXPORT_SYMBOL(qdisc_class_hash_grow);
524 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
526 unsigned int size = 4;
528 clhash->hash = qdisc_class_hash_alloc(size);
529 if (clhash->hash == NULL)
530 return -ENOMEM;
531 clhash->hashsize = size;
532 clhash->hashmask = size - 1;
533 clhash->hashelems = 0;
534 return 0;
536 EXPORT_SYMBOL(qdisc_class_hash_init);
538 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
540 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
542 EXPORT_SYMBOL(qdisc_class_hash_destroy);
544 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
545 struct Qdisc_class_common *cl)
547 unsigned int h;
549 INIT_HLIST_NODE(&cl->hnode);
550 h = qdisc_class_hash(cl->classid, clhash->hashmask);
551 hlist_add_head(&cl->hnode, &clhash->hash[h]);
552 clhash->hashelems++;
554 EXPORT_SYMBOL(qdisc_class_hash_insert);
556 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
557 struct Qdisc_class_common *cl)
559 hlist_del(&cl->hnode);
560 clhash->hashelems--;
562 EXPORT_SYMBOL(qdisc_class_hash_remove);
564 /* Allocate an unique handle from space managed by kernel */
566 static u32 qdisc_alloc_handle(struct net_device *dev)
568 int i = 0x10000;
569 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
571 do {
572 autohandle += TC_H_MAKE(0x10000U, 0);
573 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
574 autohandle = TC_H_MAKE(0x80000000U, 0);
575 } while (qdisc_lookup(dev, autohandle) && --i > 0);
577 return i>0 ? autohandle : 0;
580 /* Attach toplevel qdisc to device queue. */
582 static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
583 struct Qdisc *qdisc)
585 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
586 spinlock_t *root_lock;
588 root_lock = qdisc_root_lock(oqdisc);
589 spin_lock_bh(root_lock);
591 /* Prune old scheduler */
592 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
593 qdisc_reset(oqdisc);
595 /* ... and graft new one */
596 if (qdisc == NULL)
597 qdisc = &noop_qdisc;
598 dev_queue->qdisc_sleeping = qdisc;
599 dev_queue->qdisc = &noop_qdisc;
601 spin_unlock_bh(root_lock);
603 return oqdisc;
606 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
608 const struct Qdisc_class_ops *cops;
609 unsigned long cl;
610 u32 parentid;
612 if (n == 0)
613 return;
614 while ((parentid = sch->parent)) {
615 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
616 return;
618 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
619 if (sch == NULL) {
620 WARN_ON(parentid != TC_H_ROOT);
621 return;
623 cops = sch->ops->cl_ops;
624 if (cops->qlen_notify) {
625 cl = cops->get(sch, parentid);
626 cops->qlen_notify(sch, cl);
627 cops->put(sch, cl);
629 sch->q.qlen -= n;
632 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
634 static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
635 struct Qdisc *old, struct Qdisc *new)
637 if (new || old)
638 qdisc_notify(skb, n, clid, old, new);
640 if (old) {
641 sch_tree_lock(old);
642 qdisc_destroy(old);
643 sch_tree_unlock(old);
647 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
648 * to device "dev".
650 * When appropriate send a netlink notification using 'skb'
651 * and "n".
653 * On success, destroy old qdisc.
656 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
657 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
658 struct Qdisc *new, struct Qdisc *old)
660 struct Qdisc *q = old;
661 int err = 0;
663 if (parent == NULL) {
664 unsigned int i, num_q, ingress;
666 ingress = 0;
667 num_q = dev->num_tx_queues;
668 if ((q && q->flags & TCQ_F_INGRESS) ||
669 (new && new->flags & TCQ_F_INGRESS)) {
670 num_q = 1;
671 ingress = 1;
674 if (dev->flags & IFF_UP)
675 dev_deactivate(dev);
677 for (i = 0; i < num_q; i++) {
678 struct netdev_queue *dev_queue = &dev->rx_queue;
680 if (!ingress)
681 dev_queue = netdev_get_tx_queue(dev, i);
683 old = dev_graft_qdisc(dev_queue, new);
684 if (new && i > 0)
685 atomic_inc(&new->refcnt);
687 notify_and_destroy(skb, n, classid, old, new);
690 if (dev->flags & IFF_UP)
691 dev_activate(dev);
692 } else {
693 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
695 err = -EINVAL;
697 if (cops) {
698 unsigned long cl = cops->get(parent, classid);
699 if (cl) {
700 err = cops->graft(parent, cl, new, &old);
701 cops->put(parent, cl);
704 if (!err)
705 notify_and_destroy(skb, n, classid, old, new);
707 return err;
711 Allocate and initialize new qdisc.
713 Parameters are passed via opt.
716 static struct Qdisc *
717 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
718 u32 parent, u32 handle, struct nlattr **tca, int *errp)
720 int err;
721 struct nlattr *kind = tca[TCA_KIND];
722 struct Qdisc *sch;
723 struct Qdisc_ops *ops;
724 struct qdisc_size_table *stab;
726 ops = qdisc_lookup_ops(kind);
727 #ifdef CONFIG_KMOD
728 if (ops == NULL && kind != NULL) {
729 char name[IFNAMSIZ];
730 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
731 /* We dropped the RTNL semaphore in order to
732 * perform the module load. So, even if we
733 * succeeded in loading the module we have to
734 * tell the caller to replay the request. We
735 * indicate this using -EAGAIN.
736 * We replay the request because the device may
737 * go away in the mean time.
739 rtnl_unlock();
740 request_module("sch_%s", name);
741 rtnl_lock();
742 ops = qdisc_lookup_ops(kind);
743 if (ops != NULL) {
744 /* We will try again qdisc_lookup_ops,
745 * so don't keep a reference.
747 module_put(ops->owner);
748 err = -EAGAIN;
749 goto err_out;
753 #endif
755 err = -ENOENT;
756 if (ops == NULL)
757 goto err_out;
759 sch = qdisc_alloc(dev_queue, ops);
760 if (IS_ERR(sch)) {
761 err = PTR_ERR(sch);
762 goto err_out2;
765 sch->parent = parent;
767 if (handle == TC_H_INGRESS) {
768 sch->flags |= TCQ_F_INGRESS;
769 handle = TC_H_MAKE(TC_H_INGRESS, 0);
770 } else {
771 if (handle == 0) {
772 handle = qdisc_alloc_handle(dev);
773 err = -ENOMEM;
774 if (handle == 0)
775 goto err_out3;
779 sch->handle = handle;
781 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
782 if (tca[TCA_STAB]) {
783 stab = qdisc_get_stab(tca[TCA_STAB]);
784 if (IS_ERR(stab)) {
785 err = PTR_ERR(stab);
786 goto err_out3;
788 sch->stab = stab;
790 if (tca[TCA_RATE]) {
791 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
792 qdisc_root_lock(sch),
793 tca[TCA_RATE]);
794 if (err) {
796 * Any broken qdiscs that would require
797 * a ops->reset() here? The qdisc was never
798 * in action so it shouldn't be necessary.
800 if (ops->destroy)
801 ops->destroy(sch);
802 goto err_out3;
805 if ((parent != TC_H_ROOT) && !(sch->flags & TCQ_F_INGRESS))
806 list_add_tail(&sch->list, &dev_queue->qdisc_sleeping->list);
808 return sch;
810 err_out3:
811 qdisc_put_stab(sch->stab);
812 dev_put(dev);
813 kfree((char *) sch - sch->padded);
814 err_out2:
815 module_put(ops->owner);
816 err_out:
817 *errp = err;
818 return NULL;
821 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
823 struct qdisc_size_table *stab = NULL;
824 int err = 0;
826 if (tca[TCA_OPTIONS]) {
827 if (sch->ops->change == NULL)
828 return -EINVAL;
829 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
830 if (err)
831 return err;
834 if (tca[TCA_STAB]) {
835 stab = qdisc_get_stab(tca[TCA_STAB]);
836 if (IS_ERR(stab))
837 return PTR_ERR(stab);
840 qdisc_put_stab(sch->stab);
841 sch->stab = stab;
843 if (tca[TCA_RATE])
844 gen_replace_estimator(&sch->bstats, &sch->rate_est,
845 qdisc_root_lock(sch), tca[TCA_RATE]);
846 return 0;
849 struct check_loop_arg
851 struct qdisc_walker w;
852 struct Qdisc *p;
853 int depth;
856 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
858 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
860 struct check_loop_arg arg;
862 if (q->ops->cl_ops == NULL)
863 return 0;
865 arg.w.stop = arg.w.skip = arg.w.count = 0;
866 arg.w.fn = check_loop_fn;
867 arg.depth = depth;
868 arg.p = p;
869 q->ops->cl_ops->walk(q, &arg.w);
870 return arg.w.stop ? -ELOOP : 0;
873 static int
874 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
876 struct Qdisc *leaf;
877 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
878 struct check_loop_arg *arg = (struct check_loop_arg *)w;
880 leaf = cops->leaf(q, cl);
881 if (leaf) {
882 if (leaf == arg->p || arg->depth > 7)
883 return -ELOOP;
884 return check_loop(leaf, arg->p, arg->depth + 1);
886 return 0;
890 * Delete/get qdisc.
893 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
895 struct net *net = sock_net(skb->sk);
896 struct tcmsg *tcm = NLMSG_DATA(n);
897 struct nlattr *tca[TCA_MAX + 1];
898 struct net_device *dev;
899 u32 clid = tcm->tcm_parent;
900 struct Qdisc *q = NULL;
901 struct Qdisc *p = NULL;
902 int err;
904 if (net != &init_net)
905 return -EINVAL;
907 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
908 return -ENODEV;
910 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
911 if (err < 0)
912 return err;
914 if (clid) {
915 if (clid != TC_H_ROOT) {
916 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
917 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
918 return -ENOENT;
919 q = qdisc_leaf(p, clid);
920 } else { /* ingress */
921 q = dev->rx_queue.qdisc_sleeping;
923 } else {
924 struct netdev_queue *dev_queue;
925 dev_queue = netdev_get_tx_queue(dev, 0);
926 q = dev_queue->qdisc_sleeping;
928 if (!q)
929 return -ENOENT;
931 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
932 return -EINVAL;
933 } else {
934 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
935 return -ENOENT;
938 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
939 return -EINVAL;
941 if (n->nlmsg_type == RTM_DELQDISC) {
942 if (!clid)
943 return -EINVAL;
944 if (q->handle == 0)
945 return -ENOENT;
946 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
947 return err;
948 } else {
949 qdisc_notify(skb, n, clid, NULL, q);
951 return 0;
955 Create/change qdisc.
958 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
960 struct net *net = sock_net(skb->sk);
961 struct tcmsg *tcm;
962 struct nlattr *tca[TCA_MAX + 1];
963 struct net_device *dev;
964 u32 clid;
965 struct Qdisc *q, *p;
966 int err;
968 if (net != &init_net)
969 return -EINVAL;
971 replay:
972 /* Reinit, just in case something touches this. */
973 tcm = NLMSG_DATA(n);
974 clid = tcm->tcm_parent;
975 q = p = NULL;
977 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
978 return -ENODEV;
980 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
981 if (err < 0)
982 return err;
984 if (clid) {
985 if (clid != TC_H_ROOT) {
986 if (clid != TC_H_INGRESS) {
987 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
988 return -ENOENT;
989 q = qdisc_leaf(p, clid);
990 } else { /*ingress */
991 q = dev->rx_queue.qdisc_sleeping;
993 } else {
994 struct netdev_queue *dev_queue;
995 dev_queue = netdev_get_tx_queue(dev, 0);
996 q = dev_queue->qdisc_sleeping;
999 /* It may be default qdisc, ignore it */
1000 if (q && q->handle == 0)
1001 q = NULL;
1003 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1004 if (tcm->tcm_handle) {
1005 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1006 return -EEXIST;
1007 if (TC_H_MIN(tcm->tcm_handle))
1008 return -EINVAL;
1009 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1010 goto create_n_graft;
1011 if (n->nlmsg_flags&NLM_F_EXCL)
1012 return -EEXIST;
1013 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1014 return -EINVAL;
1015 if (q == p ||
1016 (p && check_loop(q, p, 0)))
1017 return -ELOOP;
1018 atomic_inc(&q->refcnt);
1019 goto graft;
1020 } else {
1021 if (q == NULL)
1022 goto create_n_graft;
1024 /* This magic test requires explanation.
1026 * We know, that some child q is already
1027 * attached to this parent and have choice:
1028 * either to change it or to create/graft new one.
1030 * 1. We are allowed to create/graft only
1031 * if CREATE and REPLACE flags are set.
1033 * 2. If EXCL is set, requestor wanted to say,
1034 * that qdisc tcm_handle is not expected
1035 * to exist, so that we choose create/graft too.
1037 * 3. The last case is when no flags are set.
1038 * Alas, it is sort of hole in API, we
1039 * cannot decide what to do unambiguously.
1040 * For now we select create/graft, if
1041 * user gave KIND, which does not match existing.
1043 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1044 (n->nlmsg_flags&NLM_F_REPLACE) &&
1045 ((n->nlmsg_flags&NLM_F_EXCL) ||
1046 (tca[TCA_KIND] &&
1047 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1048 goto create_n_graft;
1051 } else {
1052 if (!tcm->tcm_handle)
1053 return -EINVAL;
1054 q = qdisc_lookup(dev, tcm->tcm_handle);
1057 /* Change qdisc parameters */
1058 if (q == NULL)
1059 return -ENOENT;
1060 if (n->nlmsg_flags&NLM_F_EXCL)
1061 return -EEXIST;
1062 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1063 return -EINVAL;
1064 err = qdisc_change(q, tca);
1065 if (err == 0)
1066 qdisc_notify(skb, n, clid, NULL, q);
1067 return err;
1069 create_n_graft:
1070 if (!(n->nlmsg_flags&NLM_F_CREATE))
1071 return -ENOENT;
1072 if (clid == TC_H_INGRESS)
1073 q = qdisc_create(dev, &dev->rx_queue,
1074 tcm->tcm_parent, tcm->tcm_parent,
1075 tca, &err);
1076 else
1077 q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
1078 tcm->tcm_parent, tcm->tcm_handle,
1079 tca, &err);
1080 if (q == NULL) {
1081 if (err == -EAGAIN)
1082 goto replay;
1083 return err;
1086 graft:
1087 if (1) {
1088 spinlock_t *root_lock;
1090 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1091 if (err) {
1092 if (q) {
1093 root_lock = qdisc_root_lock(q);
1094 spin_lock_bh(root_lock);
1095 qdisc_destroy(q);
1096 spin_unlock_bh(root_lock);
1098 return err;
1101 return 0;
1104 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1105 u32 pid, u32 seq, u16 flags, int event)
1107 struct tcmsg *tcm;
1108 struct nlmsghdr *nlh;
1109 unsigned char *b = skb_tail_pointer(skb);
1110 struct gnet_dump d;
1112 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1113 tcm = NLMSG_DATA(nlh);
1114 tcm->tcm_family = AF_UNSPEC;
1115 tcm->tcm__pad1 = 0;
1116 tcm->tcm__pad2 = 0;
1117 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1118 tcm->tcm_parent = clid;
1119 tcm->tcm_handle = q->handle;
1120 tcm->tcm_info = atomic_read(&q->refcnt);
1121 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1122 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1123 goto nla_put_failure;
1124 q->qstats.qlen = q->q.qlen;
1126 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1127 goto nla_put_failure;
1129 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1130 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1131 goto nla_put_failure;
1133 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1134 goto nla_put_failure;
1136 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1137 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1138 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1139 goto nla_put_failure;
1141 if (gnet_stats_finish_copy(&d) < 0)
1142 goto nla_put_failure;
1144 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1145 return skb->len;
1147 nlmsg_failure:
1148 nla_put_failure:
1149 nlmsg_trim(skb, b);
1150 return -1;
1153 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1154 u32 clid, struct Qdisc *old, struct Qdisc *new)
1156 struct sk_buff *skb;
1157 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1159 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1160 if (!skb)
1161 return -ENOBUFS;
1163 if (old && old->handle) {
1164 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1165 goto err_out;
1167 if (new) {
1168 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1169 goto err_out;
1172 if (skb->len)
1173 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1175 err_out:
1176 kfree_skb(skb);
1177 return -EINVAL;
1180 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1182 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1185 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1186 struct netlink_callback *cb,
1187 int *q_idx_p, int s_q_idx)
1189 int ret = 0, q_idx = *q_idx_p;
1190 struct Qdisc *q;
1192 if (!root)
1193 return 0;
1195 q = root;
1196 if (q_idx < s_q_idx) {
1197 q_idx++;
1198 } else {
1199 if (!tc_qdisc_dump_ignore(q) &&
1200 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1201 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1202 goto done;
1203 q_idx++;
1205 list_for_each_entry(q, &root->list, list) {
1206 if (q_idx < s_q_idx) {
1207 q_idx++;
1208 continue;
1210 if (!tc_qdisc_dump_ignore(q) &&
1211 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1212 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1213 goto done;
1214 q_idx++;
1217 out:
1218 *q_idx_p = q_idx;
1219 return ret;
1220 done:
1221 ret = -1;
1222 goto out;
1225 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1227 struct net *net = sock_net(skb->sk);
1228 int idx, q_idx;
1229 int s_idx, s_q_idx;
1230 struct net_device *dev;
1232 if (net != &init_net)
1233 return 0;
1235 s_idx = cb->args[0];
1236 s_q_idx = q_idx = cb->args[1];
1237 read_lock(&dev_base_lock);
1238 idx = 0;
1239 for_each_netdev(&init_net, dev) {
1240 struct netdev_queue *dev_queue;
1242 if (idx < s_idx)
1243 goto cont;
1244 if (idx > s_idx)
1245 s_q_idx = 0;
1246 q_idx = 0;
1248 dev_queue = netdev_get_tx_queue(dev, 0);
1249 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1250 goto done;
1252 dev_queue = &dev->rx_queue;
1253 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1254 goto done;
1256 cont:
1257 idx++;
1260 done:
1261 read_unlock(&dev_base_lock);
1263 cb->args[0] = idx;
1264 cb->args[1] = q_idx;
1266 return skb->len;
1271 /************************************************
1272 * Traffic classes manipulation. *
1273 ************************************************/
1277 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1279 struct net *net = sock_net(skb->sk);
1280 struct netdev_queue *dev_queue;
1281 struct tcmsg *tcm = NLMSG_DATA(n);
1282 struct nlattr *tca[TCA_MAX + 1];
1283 struct net_device *dev;
1284 struct Qdisc *q = NULL;
1285 const struct Qdisc_class_ops *cops;
1286 unsigned long cl = 0;
1287 unsigned long new_cl;
1288 u32 pid = tcm->tcm_parent;
1289 u32 clid = tcm->tcm_handle;
1290 u32 qid = TC_H_MAJ(clid);
1291 int err;
1293 if (net != &init_net)
1294 return -EINVAL;
1296 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1297 return -ENODEV;
1299 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1300 if (err < 0)
1301 return err;
1304 parent == TC_H_UNSPEC - unspecified parent.
1305 parent == TC_H_ROOT - class is root, which has no parent.
1306 parent == X:0 - parent is root class.
1307 parent == X:Y - parent is a node in hierarchy.
1308 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1310 handle == 0:0 - generate handle from kernel pool.
1311 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1312 handle == X:Y - clear.
1313 handle == X:0 - root class.
1316 /* Step 1. Determine qdisc handle X:0 */
1318 dev_queue = netdev_get_tx_queue(dev, 0);
1319 if (pid != TC_H_ROOT) {
1320 u32 qid1 = TC_H_MAJ(pid);
1322 if (qid && qid1) {
1323 /* If both majors are known, they must be identical. */
1324 if (qid != qid1)
1325 return -EINVAL;
1326 } else if (qid1) {
1327 qid = qid1;
1328 } else if (qid == 0)
1329 qid = dev_queue->qdisc_sleeping->handle;
1331 /* Now qid is genuine qdisc handle consistent
1332 both with parent and child.
1334 TC_H_MAJ(pid) still may be unspecified, complete it now.
1336 if (pid)
1337 pid = TC_H_MAKE(qid, pid);
1338 } else {
1339 if (qid == 0)
1340 qid = dev_queue->qdisc_sleeping->handle;
1343 /* OK. Locate qdisc */
1344 if ((q = qdisc_lookup(dev, qid)) == NULL)
1345 return -ENOENT;
1347 /* An check that it supports classes */
1348 cops = q->ops->cl_ops;
1349 if (cops == NULL)
1350 return -EINVAL;
1352 /* Now try to get class */
1353 if (clid == 0) {
1354 if (pid == TC_H_ROOT)
1355 clid = qid;
1356 } else
1357 clid = TC_H_MAKE(qid, clid);
1359 if (clid)
1360 cl = cops->get(q, clid);
1362 if (cl == 0) {
1363 err = -ENOENT;
1364 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1365 goto out;
1366 } else {
1367 switch (n->nlmsg_type) {
1368 case RTM_NEWTCLASS:
1369 err = -EEXIST;
1370 if (n->nlmsg_flags&NLM_F_EXCL)
1371 goto out;
1372 break;
1373 case RTM_DELTCLASS:
1374 err = cops->delete(q, cl);
1375 if (err == 0)
1376 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1377 goto out;
1378 case RTM_GETTCLASS:
1379 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1380 goto out;
1381 default:
1382 err = -EINVAL;
1383 goto out;
1387 new_cl = cl;
1388 err = cops->change(q, clid, pid, tca, &new_cl);
1389 if (err == 0)
1390 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1392 out:
1393 if (cl)
1394 cops->put(q, cl);
1396 return err;
1400 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1401 unsigned long cl,
1402 u32 pid, u32 seq, u16 flags, int event)
1404 struct tcmsg *tcm;
1405 struct nlmsghdr *nlh;
1406 unsigned char *b = skb_tail_pointer(skb);
1407 struct gnet_dump d;
1408 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1410 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1411 tcm = NLMSG_DATA(nlh);
1412 tcm->tcm_family = AF_UNSPEC;
1413 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1414 tcm->tcm_parent = q->handle;
1415 tcm->tcm_handle = q->handle;
1416 tcm->tcm_info = 0;
1417 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1418 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1419 goto nla_put_failure;
1421 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1422 TCA_XSTATS, qdisc_root_lock(q), &d) < 0)
1423 goto nla_put_failure;
1425 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1426 goto nla_put_failure;
1428 if (gnet_stats_finish_copy(&d) < 0)
1429 goto nla_put_failure;
1431 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1432 return skb->len;
1434 nlmsg_failure:
1435 nla_put_failure:
1436 nlmsg_trim(skb, b);
1437 return -1;
1440 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1441 struct Qdisc *q, unsigned long cl, int event)
1443 struct sk_buff *skb;
1444 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1446 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1447 if (!skb)
1448 return -ENOBUFS;
1450 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1451 kfree_skb(skb);
1452 return -EINVAL;
1455 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1458 struct qdisc_dump_args
1460 struct qdisc_walker w;
1461 struct sk_buff *skb;
1462 struct netlink_callback *cb;
1465 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1467 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1469 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1470 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1473 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1474 struct tcmsg *tcm, struct netlink_callback *cb,
1475 int *t_p, int s_t)
1477 struct qdisc_dump_args arg;
1479 if (tc_qdisc_dump_ignore(q) ||
1480 *t_p < s_t || !q->ops->cl_ops ||
1481 (tcm->tcm_parent &&
1482 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1483 (*t_p)++;
1484 return 0;
1486 if (*t_p > s_t)
1487 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1488 arg.w.fn = qdisc_class_dump;
1489 arg.skb = skb;
1490 arg.cb = cb;
1491 arg.w.stop = 0;
1492 arg.w.skip = cb->args[1];
1493 arg.w.count = 0;
1494 q->ops->cl_ops->walk(q, &arg.w);
1495 cb->args[1] = arg.w.count;
1496 if (arg.w.stop)
1497 return -1;
1498 (*t_p)++;
1499 return 0;
1502 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1503 struct tcmsg *tcm, struct netlink_callback *cb,
1504 int *t_p, int s_t)
1506 struct Qdisc *q;
1508 if (!root)
1509 return 0;
1511 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1512 return -1;
1514 list_for_each_entry(q, &root->list, list) {
1515 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1516 return -1;
1519 return 0;
1522 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1524 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1525 struct net *net = sock_net(skb->sk);
1526 struct netdev_queue *dev_queue;
1527 struct net_device *dev;
1528 int t, s_t;
1530 if (net != &init_net)
1531 return 0;
1533 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1534 return 0;
1535 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1536 return 0;
1538 s_t = cb->args[0];
1539 t = 0;
1541 dev_queue = netdev_get_tx_queue(dev, 0);
1542 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1543 goto done;
1545 dev_queue = &dev->rx_queue;
1546 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1547 goto done;
1549 done:
1550 cb->args[0] = t;
1552 dev_put(dev);
1553 return skb->len;
1556 /* Main classifier routine: scans classifier chain attached
1557 to this qdisc, (optionally) tests for protocol and asks
1558 specific classifiers.
1560 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1561 struct tcf_result *res)
1563 __be16 protocol = skb->protocol;
1564 int err = 0;
1566 for (; tp; tp = tp->next) {
1567 if ((tp->protocol == protocol ||
1568 tp->protocol == htons(ETH_P_ALL)) &&
1569 (err = tp->classify(skb, tp, res)) >= 0) {
1570 #ifdef CONFIG_NET_CLS_ACT
1571 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1572 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1573 #endif
1574 return err;
1577 return -1;
1579 EXPORT_SYMBOL(tc_classify_compat);
1581 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1582 struct tcf_result *res)
1584 int err = 0;
1585 __be16 protocol;
1586 #ifdef CONFIG_NET_CLS_ACT
1587 struct tcf_proto *otp = tp;
1588 reclassify:
1589 #endif
1590 protocol = skb->protocol;
1592 err = tc_classify_compat(skb, tp, res);
1593 #ifdef CONFIG_NET_CLS_ACT
1594 if (err == TC_ACT_RECLASSIFY) {
1595 u32 verd = G_TC_VERD(skb->tc_verd);
1596 tp = otp;
1598 if (verd++ >= MAX_REC_LOOP) {
1599 printk("rule prio %u protocol %02x reclassify loop, "
1600 "packet dropped\n",
1601 tp->prio&0xffff, ntohs(tp->protocol));
1602 return TC_ACT_SHOT;
1604 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1605 goto reclassify;
1607 #endif
1608 return err;
1610 EXPORT_SYMBOL(tc_classify);
1612 void tcf_destroy(struct tcf_proto *tp)
1614 tp->ops->destroy(tp);
1615 module_put(tp->ops->owner);
1616 kfree(tp);
1619 void tcf_destroy_chain(struct tcf_proto **fl)
1621 struct tcf_proto *tp;
1623 while ((tp = *fl) != NULL) {
1624 *fl = tp->next;
1625 tcf_destroy(tp);
1628 EXPORT_SYMBOL(tcf_destroy_chain);
1630 #ifdef CONFIG_PROC_FS
1631 static int psched_show(struct seq_file *seq, void *v)
1633 struct timespec ts;
1635 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1636 seq_printf(seq, "%08x %08x %08x %08x\n",
1637 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1638 1000000,
1639 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1641 return 0;
1644 static int psched_open(struct inode *inode, struct file *file)
1646 return single_open(file, psched_show, PDE(inode)->data);
1649 static const struct file_operations psched_fops = {
1650 .owner = THIS_MODULE,
1651 .open = psched_open,
1652 .read = seq_read,
1653 .llseek = seq_lseek,
1654 .release = single_release,
1656 #endif
1658 static int __init pktsched_init(void)
1660 register_qdisc(&pfifo_qdisc_ops);
1661 register_qdisc(&bfifo_qdisc_ops);
1662 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1664 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1665 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1666 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1667 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1668 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1669 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1671 return 0;
1674 subsys_initcall(pktsched_init);