Driver Core: devtmpfs - kernel-maintained tmpfs-based /dev
[linux/fpc-iii.git] / net / sched / sch_api.c
blob692d9a41cd23e714f77eda461323962e11c3a7b8
1 /*
2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * Fixes:
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
32 #include <net/net_namespace.h>
33 #include <net/sock.h>
34 #include <net/netlink.h>
35 #include <net/pkt_sched.h>
37 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
38 struct Qdisc *old, struct Qdisc *new);
39 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
40 struct Qdisc *q, unsigned long cl, int event);
44 Short review.
45 -------------
47 This file consists of two interrelated parts:
49 1. queueing disciplines manager frontend.
50 2. traffic classes manager frontend.
52 Generally, queueing discipline ("qdisc") is a black box,
53 which is able to enqueue packets and to dequeue them (when
54 device is ready to send something) in order and at times
55 determined by algorithm hidden in it.
57 qdisc's are divided to two categories:
58 - "queues", which have no internal structure visible from outside.
59 - "schedulers", which split all the packets to "traffic classes",
60 using "packet classifiers" (look at cls_api.c)
62 In turn, classes may have child qdiscs (as rule, queues)
63 attached to them etc. etc. etc.
65 The goal of the routines in this file is to translate
66 information supplied by user in the form of handles
67 to more intelligible for kernel form, to make some sanity
68 checks and part of work, which is common to all qdiscs
69 and to provide rtnetlink notifications.
71 All real intelligent work is done inside qdisc modules.
75 Every discipline has two major routines: enqueue and dequeue.
77 ---dequeue
79 dequeue usually returns a skb to send. It is allowed to return NULL,
80 but it does not mean that queue is empty, it just means that
81 discipline does not want to send anything this time.
82 Queue is really empty if q->q.qlen == 0.
83 For complicated disciplines with multiple queues q->q is not
84 real packet queue, but however q->q.qlen must be valid.
86 ---enqueue
88 enqueue returns 0, if packet was enqueued successfully.
89 If packet (this one or another one) was dropped, it returns
90 not zero error code.
91 NET_XMIT_DROP - this packet dropped
92 Expected action: do not backoff, but wait until queue will clear.
93 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
94 Expected action: backoff or ignore
95 NET_XMIT_POLICED - dropped by police.
96 Expected action: backoff or error to real-time apps.
98 Auxiliary routines:
100 ---peek
102 like dequeue but without removing a packet from the queue
104 ---reset
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
109 ---init
111 initializes newly created qdisc.
113 ---destroy
115 destroys resources allocated by init and during lifetime of qdisc.
117 ---change
119 changes qdisc parameters.
122 /* Protects list of registered TC modules. It is pure SMP lock. */
123 static DEFINE_RWLOCK(qdisc_mod_lock);
126 /************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
131 /* The list of all installed queueing disciplines. */
133 static struct Qdisc_ops *qdisc_base;
135 /* Register/uregister queueing discipline */
137 int register_qdisc(struct Qdisc_ops *qops)
139 struct Qdisc_ops *q, **qp;
140 int rc = -EEXIST;
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
145 goto out;
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->peek == NULL) {
150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
157 if (qops->dequeue == NULL)
158 qops->dequeue = noop_qdisc_ops.dequeue;
160 qops->next = NULL;
161 *qp = qops;
162 rc = 0;
163 out:
164 write_unlock(&qdisc_mod_lock);
165 return rc;
167 EXPORT_SYMBOL(register_qdisc);
169 int unregister_qdisc(struct Qdisc_ops *qops)
171 struct Qdisc_ops *q, **qp;
172 int err = -ENOENT;
174 write_lock(&qdisc_mod_lock);
175 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
176 if (q == qops)
177 break;
178 if (q) {
179 *qp = q->next;
180 q->next = NULL;
181 err = 0;
183 write_unlock(&qdisc_mod_lock);
184 return err;
186 EXPORT_SYMBOL(unregister_qdisc);
188 /* We know handle. Find qdisc among all qdisc's attached to device
189 (root qdisc, all its children, children of children etc.)
192 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
194 struct Qdisc *q;
196 if (!(root->flags & TCQ_F_BUILTIN) &&
197 root->handle == handle)
198 return root;
200 list_for_each_entry(q, &root->list, list) {
201 if (q->handle == handle)
202 return q;
204 return NULL;
207 static void qdisc_list_add(struct Qdisc *q)
209 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
210 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
213 void qdisc_list_del(struct Qdisc *q)
215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
216 list_del(&q->list);
218 EXPORT_SYMBOL(qdisc_list_del);
220 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
222 struct Qdisc *q;
224 q = qdisc_match_from_root(dev->qdisc, handle);
225 if (q)
226 goto out;
228 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
229 out:
230 return q;
233 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
235 unsigned long cl;
236 struct Qdisc *leaf;
237 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
239 if (cops == NULL)
240 return NULL;
241 cl = cops->get(p, classid);
243 if (cl == 0)
244 return NULL;
245 leaf = cops->leaf(p, cl);
246 cops->put(p, cl);
247 return leaf;
250 /* Find queueing discipline by name */
252 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
254 struct Qdisc_ops *q = NULL;
256 if (kind) {
257 read_lock(&qdisc_mod_lock);
258 for (q = qdisc_base; q; q = q->next) {
259 if (nla_strcmp(kind, q->id) == 0) {
260 if (!try_module_get(q->owner))
261 q = NULL;
262 break;
265 read_unlock(&qdisc_mod_lock);
267 return q;
270 static struct qdisc_rate_table *qdisc_rtab_list;
272 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
274 struct qdisc_rate_table *rtab;
276 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
277 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
278 rtab->refcnt++;
279 return rtab;
283 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
284 nla_len(tab) != TC_RTAB_SIZE)
285 return NULL;
287 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
288 if (rtab) {
289 rtab->rate = *r;
290 rtab->refcnt = 1;
291 memcpy(rtab->data, nla_data(tab), 1024);
292 rtab->next = qdisc_rtab_list;
293 qdisc_rtab_list = rtab;
295 return rtab;
297 EXPORT_SYMBOL(qdisc_get_rtab);
299 void qdisc_put_rtab(struct qdisc_rate_table *tab)
301 struct qdisc_rate_table *rtab, **rtabp;
303 if (!tab || --tab->refcnt)
304 return;
306 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
307 if (rtab == tab) {
308 *rtabp = rtab->next;
309 kfree(rtab);
310 return;
314 EXPORT_SYMBOL(qdisc_put_rtab);
316 static LIST_HEAD(qdisc_stab_list);
317 static DEFINE_SPINLOCK(qdisc_stab_lock);
319 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
320 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
321 [TCA_STAB_DATA] = { .type = NLA_BINARY },
324 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
326 struct nlattr *tb[TCA_STAB_MAX + 1];
327 struct qdisc_size_table *stab;
328 struct tc_sizespec *s;
329 unsigned int tsize = 0;
330 u16 *tab = NULL;
331 int err;
333 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
334 if (err < 0)
335 return ERR_PTR(err);
336 if (!tb[TCA_STAB_BASE])
337 return ERR_PTR(-EINVAL);
339 s = nla_data(tb[TCA_STAB_BASE]);
341 if (s->tsize > 0) {
342 if (!tb[TCA_STAB_DATA])
343 return ERR_PTR(-EINVAL);
344 tab = nla_data(tb[TCA_STAB_DATA]);
345 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
348 if (!s || tsize != s->tsize || (!tab && tsize > 0))
349 return ERR_PTR(-EINVAL);
351 spin_lock(&qdisc_stab_lock);
353 list_for_each_entry(stab, &qdisc_stab_list, list) {
354 if (memcmp(&stab->szopts, s, sizeof(*s)))
355 continue;
356 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
357 continue;
358 stab->refcnt++;
359 spin_unlock(&qdisc_stab_lock);
360 return stab;
363 spin_unlock(&qdisc_stab_lock);
365 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
366 if (!stab)
367 return ERR_PTR(-ENOMEM);
369 stab->refcnt = 1;
370 stab->szopts = *s;
371 if (tsize > 0)
372 memcpy(stab->data, tab, tsize * sizeof(u16));
374 spin_lock(&qdisc_stab_lock);
375 list_add_tail(&stab->list, &qdisc_stab_list);
376 spin_unlock(&qdisc_stab_lock);
378 return stab;
381 void qdisc_put_stab(struct qdisc_size_table *tab)
383 if (!tab)
384 return;
386 spin_lock(&qdisc_stab_lock);
388 if (--tab->refcnt == 0) {
389 list_del(&tab->list);
390 kfree(tab);
393 spin_unlock(&qdisc_stab_lock);
395 EXPORT_SYMBOL(qdisc_put_stab);
397 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
399 struct nlattr *nest;
401 nest = nla_nest_start(skb, TCA_STAB);
402 if (nest == NULL)
403 goto nla_put_failure;
404 NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
405 nla_nest_end(skb, nest);
407 return skb->len;
409 nla_put_failure:
410 return -1;
413 void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
415 int pkt_len, slot;
417 pkt_len = skb->len + stab->szopts.overhead;
418 if (unlikely(!stab->szopts.tsize))
419 goto out;
421 slot = pkt_len + stab->szopts.cell_align;
422 if (unlikely(slot < 0))
423 slot = 0;
425 slot >>= stab->szopts.cell_log;
426 if (likely(slot < stab->szopts.tsize))
427 pkt_len = stab->data[slot];
428 else
429 pkt_len = stab->data[stab->szopts.tsize - 1] *
430 (slot / stab->szopts.tsize) +
431 stab->data[slot % stab->szopts.tsize];
433 pkt_len <<= stab->szopts.size_log;
434 out:
435 if (unlikely(pkt_len < 1))
436 pkt_len = 1;
437 qdisc_skb_cb(skb)->pkt_len = pkt_len;
439 EXPORT_SYMBOL(qdisc_calculate_pkt_len);
441 void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
443 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
444 printk(KERN_WARNING
445 "%s: %s qdisc %X: is non-work-conserving?\n",
446 txt, qdisc->ops->id, qdisc->handle >> 16);
447 qdisc->flags |= TCQ_F_WARN_NONWC;
450 EXPORT_SYMBOL(qdisc_warn_nonwc);
452 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
454 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
455 timer);
457 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
458 __netif_schedule(qdisc_root(wd->qdisc));
460 return HRTIMER_NORESTART;
463 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
465 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
466 wd->timer.function = qdisc_watchdog;
467 wd->qdisc = qdisc;
469 EXPORT_SYMBOL(qdisc_watchdog_init);
471 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
473 ktime_t time;
475 if (test_bit(__QDISC_STATE_DEACTIVATED,
476 &qdisc_root_sleeping(wd->qdisc)->state))
477 return;
479 wd->qdisc->flags |= TCQ_F_THROTTLED;
480 time = ktime_set(0, 0);
481 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
482 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
484 EXPORT_SYMBOL(qdisc_watchdog_schedule);
486 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
488 hrtimer_cancel(&wd->timer);
489 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
491 EXPORT_SYMBOL(qdisc_watchdog_cancel);
493 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
495 unsigned int size = n * sizeof(struct hlist_head), i;
496 struct hlist_head *h;
498 if (size <= PAGE_SIZE)
499 h = kmalloc(size, GFP_KERNEL);
500 else
501 h = (struct hlist_head *)
502 __get_free_pages(GFP_KERNEL, get_order(size));
504 if (h != NULL) {
505 for (i = 0; i < n; i++)
506 INIT_HLIST_HEAD(&h[i]);
508 return h;
511 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
513 unsigned int size = n * sizeof(struct hlist_head);
515 if (size <= PAGE_SIZE)
516 kfree(h);
517 else
518 free_pages((unsigned long)h, get_order(size));
521 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
523 struct Qdisc_class_common *cl;
524 struct hlist_node *n, *next;
525 struct hlist_head *nhash, *ohash;
526 unsigned int nsize, nmask, osize;
527 unsigned int i, h;
529 /* Rehash when load factor exceeds 0.75 */
530 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
531 return;
532 nsize = clhash->hashsize * 2;
533 nmask = nsize - 1;
534 nhash = qdisc_class_hash_alloc(nsize);
535 if (nhash == NULL)
536 return;
538 ohash = clhash->hash;
539 osize = clhash->hashsize;
541 sch_tree_lock(sch);
542 for (i = 0; i < osize; i++) {
543 hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
544 h = qdisc_class_hash(cl->classid, nmask);
545 hlist_add_head(&cl->hnode, &nhash[h]);
548 clhash->hash = nhash;
549 clhash->hashsize = nsize;
550 clhash->hashmask = nmask;
551 sch_tree_unlock(sch);
553 qdisc_class_hash_free(ohash, osize);
555 EXPORT_SYMBOL(qdisc_class_hash_grow);
557 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
559 unsigned int size = 4;
561 clhash->hash = qdisc_class_hash_alloc(size);
562 if (clhash->hash == NULL)
563 return -ENOMEM;
564 clhash->hashsize = size;
565 clhash->hashmask = size - 1;
566 clhash->hashelems = 0;
567 return 0;
569 EXPORT_SYMBOL(qdisc_class_hash_init);
571 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
573 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
575 EXPORT_SYMBOL(qdisc_class_hash_destroy);
577 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
578 struct Qdisc_class_common *cl)
580 unsigned int h;
582 INIT_HLIST_NODE(&cl->hnode);
583 h = qdisc_class_hash(cl->classid, clhash->hashmask);
584 hlist_add_head(&cl->hnode, &clhash->hash[h]);
585 clhash->hashelems++;
587 EXPORT_SYMBOL(qdisc_class_hash_insert);
589 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
590 struct Qdisc_class_common *cl)
592 hlist_del(&cl->hnode);
593 clhash->hashelems--;
595 EXPORT_SYMBOL(qdisc_class_hash_remove);
597 /* Allocate an unique handle from space managed by kernel */
599 static u32 qdisc_alloc_handle(struct net_device *dev)
601 int i = 0x10000;
602 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
604 do {
605 autohandle += TC_H_MAKE(0x10000U, 0);
606 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
607 autohandle = TC_H_MAKE(0x80000000U, 0);
608 } while (qdisc_lookup(dev, autohandle) && --i > 0);
610 return i>0 ? autohandle : 0;
613 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
615 const struct Qdisc_class_ops *cops;
616 unsigned long cl;
617 u32 parentid;
619 if (n == 0)
620 return;
621 while ((parentid = sch->parent)) {
622 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
623 return;
625 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
626 if (sch == NULL) {
627 WARN_ON(parentid != TC_H_ROOT);
628 return;
630 cops = sch->ops->cl_ops;
631 if (cops->qlen_notify) {
632 cl = cops->get(sch, parentid);
633 cops->qlen_notify(sch, cl);
634 cops->put(sch, cl);
636 sch->q.qlen -= n;
639 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
641 static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
642 struct Qdisc *old, struct Qdisc *new)
644 if (new || old)
645 qdisc_notify(skb, n, clid, old, new);
647 if (old)
648 qdisc_destroy(old);
651 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
652 * to device "dev".
654 * When appropriate send a netlink notification using 'skb'
655 * and "n".
657 * On success, destroy old qdisc.
660 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
661 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
662 struct Qdisc *new, struct Qdisc *old)
664 struct Qdisc *q = old;
665 int err = 0;
667 if (parent == NULL) {
668 unsigned int i, num_q, ingress;
670 ingress = 0;
671 num_q = dev->num_tx_queues;
672 if ((q && q->flags & TCQ_F_INGRESS) ||
673 (new && new->flags & TCQ_F_INGRESS)) {
674 num_q = 1;
675 ingress = 1;
678 if (dev->flags & IFF_UP)
679 dev_deactivate(dev);
681 if (new && new->ops->attach) {
682 new->ops->attach(new);
683 num_q = 0;
686 for (i = 0; i < num_q; i++) {
687 struct netdev_queue *dev_queue = &dev->rx_queue;
689 if (!ingress)
690 dev_queue = netdev_get_tx_queue(dev, i);
692 old = dev_graft_qdisc(dev_queue, new);
693 if (new && i > 0)
694 atomic_inc(&new->refcnt);
696 qdisc_destroy(old);
699 notify_and_destroy(skb, n, classid, dev->qdisc, new);
700 if (new && !new->ops->attach)
701 atomic_inc(&new->refcnt);
702 dev->qdisc = new ? : &noop_qdisc;
704 if (dev->flags & IFF_UP)
705 dev_activate(dev);
706 } else {
707 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
709 err = -EOPNOTSUPP;
710 if (cops && cops->graft) {
711 unsigned long cl = cops->get(parent, classid);
712 if (cl) {
713 err = cops->graft(parent, cl, new, &old);
714 cops->put(parent, cl);
715 } else
716 err = -ENOENT;
718 if (!err)
719 notify_and_destroy(skb, n, classid, old, new);
721 return err;
724 /* lockdep annotation is needed for ingress; egress gets it only for name */
725 static struct lock_class_key qdisc_tx_lock;
726 static struct lock_class_key qdisc_rx_lock;
729 Allocate and initialize new qdisc.
731 Parameters are passed via opt.
734 static struct Qdisc *
735 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
736 struct Qdisc *p, u32 parent, u32 handle,
737 struct nlattr **tca, int *errp)
739 int err;
740 struct nlattr *kind = tca[TCA_KIND];
741 struct Qdisc *sch;
742 struct Qdisc_ops *ops;
743 struct qdisc_size_table *stab;
745 ops = qdisc_lookup_ops(kind);
746 #ifdef CONFIG_MODULES
747 if (ops == NULL && kind != NULL) {
748 char name[IFNAMSIZ];
749 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
750 /* We dropped the RTNL semaphore in order to
751 * perform the module load. So, even if we
752 * succeeded in loading the module we have to
753 * tell the caller to replay the request. We
754 * indicate this using -EAGAIN.
755 * We replay the request because the device may
756 * go away in the mean time.
758 rtnl_unlock();
759 request_module("sch_%s", name);
760 rtnl_lock();
761 ops = qdisc_lookup_ops(kind);
762 if (ops != NULL) {
763 /* We will try again qdisc_lookup_ops,
764 * so don't keep a reference.
766 module_put(ops->owner);
767 err = -EAGAIN;
768 goto err_out;
772 #endif
774 err = -ENOENT;
775 if (ops == NULL)
776 goto err_out;
778 sch = qdisc_alloc(dev_queue, ops);
779 if (IS_ERR(sch)) {
780 err = PTR_ERR(sch);
781 goto err_out2;
784 sch->parent = parent;
786 if (handle == TC_H_INGRESS) {
787 sch->flags |= TCQ_F_INGRESS;
788 handle = TC_H_MAKE(TC_H_INGRESS, 0);
789 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
790 } else {
791 if (handle == 0) {
792 handle = qdisc_alloc_handle(dev);
793 err = -ENOMEM;
794 if (handle == 0)
795 goto err_out3;
797 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
800 sch->handle = handle;
802 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
803 if (tca[TCA_STAB]) {
804 stab = qdisc_get_stab(tca[TCA_STAB]);
805 if (IS_ERR(stab)) {
806 err = PTR_ERR(stab);
807 goto err_out3;
809 sch->stab = stab;
811 if (tca[TCA_RATE]) {
812 spinlock_t *root_lock;
814 err = -EOPNOTSUPP;
815 if (sch->flags & TCQ_F_MQROOT)
816 goto err_out4;
818 if ((sch->parent != TC_H_ROOT) &&
819 !(sch->flags & TCQ_F_INGRESS) &&
820 (!p || !(p->flags & TCQ_F_MQROOT)))
821 root_lock = qdisc_root_sleeping_lock(sch);
822 else
823 root_lock = qdisc_lock(sch);
825 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
826 root_lock, tca[TCA_RATE]);
827 if (err)
828 goto err_out4;
831 qdisc_list_add(sch);
833 return sch;
835 err_out3:
836 qdisc_put_stab(sch->stab);
837 dev_put(dev);
838 kfree((char *) sch - sch->padded);
839 err_out2:
840 module_put(ops->owner);
841 err_out:
842 *errp = err;
843 return NULL;
845 err_out4:
847 * Any broken qdiscs that would require a ops->reset() here?
848 * The qdisc was never in action so it shouldn't be necessary.
850 if (ops->destroy)
851 ops->destroy(sch);
852 goto err_out3;
855 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
857 struct qdisc_size_table *stab = NULL;
858 int err = 0;
860 if (tca[TCA_OPTIONS]) {
861 if (sch->ops->change == NULL)
862 return -EINVAL;
863 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
864 if (err)
865 return err;
868 if (tca[TCA_STAB]) {
869 stab = qdisc_get_stab(tca[TCA_STAB]);
870 if (IS_ERR(stab))
871 return PTR_ERR(stab);
874 qdisc_put_stab(sch->stab);
875 sch->stab = stab;
877 if (tca[TCA_RATE]) {
878 /* NB: ignores errors from replace_estimator
879 because change can't be undone. */
880 if (sch->flags & TCQ_F_MQROOT)
881 goto out;
882 gen_replace_estimator(&sch->bstats, &sch->rate_est,
883 qdisc_root_sleeping_lock(sch),
884 tca[TCA_RATE]);
886 out:
887 return 0;
890 struct check_loop_arg
892 struct qdisc_walker w;
893 struct Qdisc *p;
894 int depth;
897 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
899 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
901 struct check_loop_arg arg;
903 if (q->ops->cl_ops == NULL)
904 return 0;
906 arg.w.stop = arg.w.skip = arg.w.count = 0;
907 arg.w.fn = check_loop_fn;
908 arg.depth = depth;
909 arg.p = p;
910 q->ops->cl_ops->walk(q, &arg.w);
911 return arg.w.stop ? -ELOOP : 0;
914 static int
915 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
917 struct Qdisc *leaf;
918 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
919 struct check_loop_arg *arg = (struct check_loop_arg *)w;
921 leaf = cops->leaf(q, cl);
922 if (leaf) {
923 if (leaf == arg->p || arg->depth > 7)
924 return -ELOOP;
925 return check_loop(leaf, arg->p, arg->depth + 1);
927 return 0;
931 * Delete/get qdisc.
934 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
936 struct net *net = sock_net(skb->sk);
937 struct tcmsg *tcm = NLMSG_DATA(n);
938 struct nlattr *tca[TCA_MAX + 1];
939 struct net_device *dev;
940 u32 clid = tcm->tcm_parent;
941 struct Qdisc *q = NULL;
942 struct Qdisc *p = NULL;
943 int err;
945 if (net != &init_net)
946 return -EINVAL;
948 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
949 return -ENODEV;
951 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
952 if (err < 0)
953 return err;
955 if (clid) {
956 if (clid != TC_H_ROOT) {
957 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
958 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
959 return -ENOENT;
960 q = qdisc_leaf(p, clid);
961 } else { /* ingress */
962 q = dev->rx_queue.qdisc_sleeping;
964 } else {
965 q = dev->qdisc;
967 if (!q)
968 return -ENOENT;
970 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
971 return -EINVAL;
972 } else {
973 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
974 return -ENOENT;
977 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
978 return -EINVAL;
980 if (n->nlmsg_type == RTM_DELQDISC) {
981 if (!clid)
982 return -EINVAL;
983 if (q->handle == 0)
984 return -ENOENT;
985 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
986 return err;
987 } else {
988 qdisc_notify(skb, n, clid, NULL, q);
990 return 0;
994 Create/change qdisc.
997 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
999 struct net *net = sock_net(skb->sk);
1000 struct tcmsg *tcm;
1001 struct nlattr *tca[TCA_MAX + 1];
1002 struct net_device *dev;
1003 u32 clid;
1004 struct Qdisc *q, *p;
1005 int err;
1007 if (net != &init_net)
1008 return -EINVAL;
1010 replay:
1011 /* Reinit, just in case something touches this. */
1012 tcm = NLMSG_DATA(n);
1013 clid = tcm->tcm_parent;
1014 q = p = NULL;
1016 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1017 return -ENODEV;
1019 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1020 if (err < 0)
1021 return err;
1023 if (clid) {
1024 if (clid != TC_H_ROOT) {
1025 if (clid != TC_H_INGRESS) {
1026 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1027 return -ENOENT;
1028 q = qdisc_leaf(p, clid);
1029 } else { /*ingress */
1030 q = dev->rx_queue.qdisc_sleeping;
1032 } else {
1033 q = dev->qdisc;
1036 /* It may be default qdisc, ignore it */
1037 if (q && q->handle == 0)
1038 q = NULL;
1040 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1041 if (tcm->tcm_handle) {
1042 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
1043 return -EEXIST;
1044 if (TC_H_MIN(tcm->tcm_handle))
1045 return -EINVAL;
1046 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
1047 goto create_n_graft;
1048 if (n->nlmsg_flags&NLM_F_EXCL)
1049 return -EEXIST;
1050 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1051 return -EINVAL;
1052 if (q == p ||
1053 (p && check_loop(q, p, 0)))
1054 return -ELOOP;
1055 atomic_inc(&q->refcnt);
1056 goto graft;
1057 } else {
1058 if (q == NULL)
1059 goto create_n_graft;
1061 /* This magic test requires explanation.
1063 * We know, that some child q is already
1064 * attached to this parent and have choice:
1065 * either to change it or to create/graft new one.
1067 * 1. We are allowed to create/graft only
1068 * if CREATE and REPLACE flags are set.
1070 * 2. If EXCL is set, requestor wanted to say,
1071 * that qdisc tcm_handle is not expected
1072 * to exist, so that we choose create/graft too.
1074 * 3. The last case is when no flags are set.
1075 * Alas, it is sort of hole in API, we
1076 * cannot decide what to do unambiguously.
1077 * For now we select create/graft, if
1078 * user gave KIND, which does not match existing.
1080 if ((n->nlmsg_flags&NLM_F_CREATE) &&
1081 (n->nlmsg_flags&NLM_F_REPLACE) &&
1082 ((n->nlmsg_flags&NLM_F_EXCL) ||
1083 (tca[TCA_KIND] &&
1084 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1085 goto create_n_graft;
1088 } else {
1089 if (!tcm->tcm_handle)
1090 return -EINVAL;
1091 q = qdisc_lookup(dev, tcm->tcm_handle);
1094 /* Change qdisc parameters */
1095 if (q == NULL)
1096 return -ENOENT;
1097 if (n->nlmsg_flags&NLM_F_EXCL)
1098 return -EEXIST;
1099 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1100 return -EINVAL;
1101 err = qdisc_change(q, tca);
1102 if (err == 0)
1103 qdisc_notify(skb, n, clid, NULL, q);
1104 return err;
1106 create_n_graft:
1107 if (!(n->nlmsg_flags&NLM_F_CREATE))
1108 return -ENOENT;
1109 if (clid == TC_H_INGRESS)
1110 q = qdisc_create(dev, &dev->rx_queue, p,
1111 tcm->tcm_parent, tcm->tcm_parent,
1112 tca, &err);
1113 else {
1114 unsigned int ntx = 0;
1116 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1117 ntx = p->ops->cl_ops->select_queue(p, tcm);
1119 q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p,
1120 tcm->tcm_parent, tcm->tcm_handle,
1121 tca, &err);
1123 if (q == NULL) {
1124 if (err == -EAGAIN)
1125 goto replay;
1126 return err;
1129 graft:
1130 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1131 if (err) {
1132 if (q)
1133 qdisc_destroy(q);
1134 return err;
1137 return 0;
1140 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1141 u32 pid, u32 seq, u16 flags, int event)
1143 struct tcmsg *tcm;
1144 struct nlmsghdr *nlh;
1145 unsigned char *b = skb_tail_pointer(skb);
1146 struct gnet_dump d;
1148 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1149 tcm = NLMSG_DATA(nlh);
1150 tcm->tcm_family = AF_UNSPEC;
1151 tcm->tcm__pad1 = 0;
1152 tcm->tcm__pad2 = 0;
1153 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1154 tcm->tcm_parent = clid;
1155 tcm->tcm_handle = q->handle;
1156 tcm->tcm_info = atomic_read(&q->refcnt);
1157 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1158 if (q->ops->dump && q->ops->dump(q, skb) < 0)
1159 goto nla_put_failure;
1160 q->qstats.qlen = q->q.qlen;
1162 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
1163 goto nla_put_failure;
1165 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1166 qdisc_root_sleeping_lock(q), &d) < 0)
1167 goto nla_put_failure;
1169 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1170 goto nla_put_failure;
1172 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
1173 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1174 gnet_stats_copy_queue(&d, &q->qstats) < 0)
1175 goto nla_put_failure;
1177 if (gnet_stats_finish_copy(&d) < 0)
1178 goto nla_put_failure;
1180 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1181 return skb->len;
1183 nlmsg_failure:
1184 nla_put_failure:
1185 nlmsg_trim(skb, b);
1186 return -1;
1189 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1190 u32 clid, struct Qdisc *old, struct Qdisc *new)
1192 struct sk_buff *skb;
1193 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1195 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1196 if (!skb)
1197 return -ENOBUFS;
1199 if (old && old->handle) {
1200 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1201 goto err_out;
1203 if (new) {
1204 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1205 goto err_out;
1208 if (skb->len)
1209 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1211 err_out:
1212 kfree_skb(skb);
1213 return -EINVAL;
1216 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1218 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1221 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1222 struct netlink_callback *cb,
1223 int *q_idx_p, int s_q_idx)
1225 int ret = 0, q_idx = *q_idx_p;
1226 struct Qdisc *q;
1228 if (!root)
1229 return 0;
1231 q = root;
1232 if (q_idx < s_q_idx) {
1233 q_idx++;
1234 } else {
1235 if (!tc_qdisc_dump_ignore(q) &&
1236 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1237 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1238 goto done;
1239 q_idx++;
1241 list_for_each_entry(q, &root->list, list) {
1242 if (q_idx < s_q_idx) {
1243 q_idx++;
1244 continue;
1246 if (!tc_qdisc_dump_ignore(q) &&
1247 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1248 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1249 goto done;
1250 q_idx++;
1253 out:
1254 *q_idx_p = q_idx;
1255 return ret;
1256 done:
1257 ret = -1;
1258 goto out;
1261 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1263 struct net *net = sock_net(skb->sk);
1264 int idx, q_idx;
1265 int s_idx, s_q_idx;
1266 struct net_device *dev;
1268 if (net != &init_net)
1269 return 0;
1271 s_idx = cb->args[0];
1272 s_q_idx = q_idx = cb->args[1];
1273 read_lock(&dev_base_lock);
1274 idx = 0;
1275 for_each_netdev(&init_net, dev) {
1276 struct netdev_queue *dev_queue;
1278 if (idx < s_idx)
1279 goto cont;
1280 if (idx > s_idx)
1281 s_q_idx = 0;
1282 q_idx = 0;
1284 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1285 goto done;
1287 dev_queue = &dev->rx_queue;
1288 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
1289 goto done;
1291 cont:
1292 idx++;
1295 done:
1296 read_unlock(&dev_base_lock);
1298 cb->args[0] = idx;
1299 cb->args[1] = q_idx;
1301 return skb->len;
1306 /************************************************
1307 * Traffic classes manipulation. *
1308 ************************************************/
1312 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1314 struct net *net = sock_net(skb->sk);
1315 struct tcmsg *tcm = NLMSG_DATA(n);
1316 struct nlattr *tca[TCA_MAX + 1];
1317 struct net_device *dev;
1318 struct Qdisc *q = NULL;
1319 const struct Qdisc_class_ops *cops;
1320 unsigned long cl = 0;
1321 unsigned long new_cl;
1322 u32 pid = tcm->tcm_parent;
1323 u32 clid = tcm->tcm_handle;
1324 u32 qid = TC_H_MAJ(clid);
1325 int err;
1327 if (net != &init_net)
1328 return -EINVAL;
1330 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1331 return -ENODEV;
1333 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1334 if (err < 0)
1335 return err;
1338 parent == TC_H_UNSPEC - unspecified parent.
1339 parent == TC_H_ROOT - class is root, which has no parent.
1340 parent == X:0 - parent is root class.
1341 parent == X:Y - parent is a node in hierarchy.
1342 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1344 handle == 0:0 - generate handle from kernel pool.
1345 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1346 handle == X:Y - clear.
1347 handle == X:0 - root class.
1350 /* Step 1. Determine qdisc handle X:0 */
1352 if (pid != TC_H_ROOT) {
1353 u32 qid1 = TC_H_MAJ(pid);
1355 if (qid && qid1) {
1356 /* If both majors are known, they must be identical. */
1357 if (qid != qid1)
1358 return -EINVAL;
1359 } else if (qid1) {
1360 qid = qid1;
1361 } else if (qid == 0)
1362 qid = dev->qdisc->handle;
1364 /* Now qid is genuine qdisc handle consistent
1365 both with parent and child.
1367 TC_H_MAJ(pid) still may be unspecified, complete it now.
1369 if (pid)
1370 pid = TC_H_MAKE(qid, pid);
1371 } else {
1372 if (qid == 0)
1373 qid = dev->qdisc->handle;
1376 /* OK. Locate qdisc */
1377 if ((q = qdisc_lookup(dev, qid)) == NULL)
1378 return -ENOENT;
1380 /* An check that it supports classes */
1381 cops = q->ops->cl_ops;
1382 if (cops == NULL)
1383 return -EINVAL;
1385 /* Now try to get class */
1386 if (clid == 0) {
1387 if (pid == TC_H_ROOT)
1388 clid = qid;
1389 } else
1390 clid = TC_H_MAKE(qid, clid);
1392 if (clid)
1393 cl = cops->get(q, clid);
1395 if (cl == 0) {
1396 err = -ENOENT;
1397 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1398 goto out;
1399 } else {
1400 switch (n->nlmsg_type) {
1401 case RTM_NEWTCLASS:
1402 err = -EEXIST;
1403 if (n->nlmsg_flags&NLM_F_EXCL)
1404 goto out;
1405 break;
1406 case RTM_DELTCLASS:
1407 err = -EOPNOTSUPP;
1408 if (cops->delete)
1409 err = cops->delete(q, cl);
1410 if (err == 0)
1411 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1412 goto out;
1413 case RTM_GETTCLASS:
1414 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1415 goto out;
1416 default:
1417 err = -EINVAL;
1418 goto out;
1422 new_cl = cl;
1423 err = -EOPNOTSUPP;
1424 if (cops->change)
1425 err = cops->change(q, clid, pid, tca, &new_cl);
1426 if (err == 0)
1427 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1429 out:
1430 if (cl)
1431 cops->put(q, cl);
1433 return err;
1437 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1438 unsigned long cl,
1439 u32 pid, u32 seq, u16 flags, int event)
1441 struct tcmsg *tcm;
1442 struct nlmsghdr *nlh;
1443 unsigned char *b = skb_tail_pointer(skb);
1444 struct gnet_dump d;
1445 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1447 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1448 tcm = NLMSG_DATA(nlh);
1449 tcm->tcm_family = AF_UNSPEC;
1450 tcm->tcm__pad1 = 0;
1451 tcm->tcm__pad2 = 0;
1452 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1453 tcm->tcm_parent = q->handle;
1454 tcm->tcm_handle = q->handle;
1455 tcm->tcm_info = 0;
1456 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1457 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1458 goto nla_put_failure;
1460 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1461 qdisc_root_sleeping_lock(q), &d) < 0)
1462 goto nla_put_failure;
1464 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1465 goto nla_put_failure;
1467 if (gnet_stats_finish_copy(&d) < 0)
1468 goto nla_put_failure;
1470 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1471 return skb->len;
1473 nlmsg_failure:
1474 nla_put_failure:
1475 nlmsg_trim(skb, b);
1476 return -1;
1479 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1480 struct Qdisc *q, unsigned long cl, int event)
1482 struct sk_buff *skb;
1483 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1485 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1486 if (!skb)
1487 return -ENOBUFS;
1489 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1490 kfree_skb(skb);
1491 return -EINVAL;
1494 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1497 struct qdisc_dump_args
1499 struct qdisc_walker w;
1500 struct sk_buff *skb;
1501 struct netlink_callback *cb;
1504 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1506 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1508 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1509 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1512 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1513 struct tcmsg *tcm, struct netlink_callback *cb,
1514 int *t_p, int s_t)
1516 struct qdisc_dump_args arg;
1518 if (tc_qdisc_dump_ignore(q) ||
1519 *t_p < s_t || !q->ops->cl_ops ||
1520 (tcm->tcm_parent &&
1521 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1522 (*t_p)++;
1523 return 0;
1525 if (*t_p > s_t)
1526 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1527 arg.w.fn = qdisc_class_dump;
1528 arg.skb = skb;
1529 arg.cb = cb;
1530 arg.w.stop = 0;
1531 arg.w.skip = cb->args[1];
1532 arg.w.count = 0;
1533 q->ops->cl_ops->walk(q, &arg.w);
1534 cb->args[1] = arg.w.count;
1535 if (arg.w.stop)
1536 return -1;
1537 (*t_p)++;
1538 return 0;
1541 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1542 struct tcmsg *tcm, struct netlink_callback *cb,
1543 int *t_p, int s_t)
1545 struct Qdisc *q;
1547 if (!root)
1548 return 0;
1550 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1551 return -1;
1553 list_for_each_entry(q, &root->list, list) {
1554 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1555 return -1;
1558 return 0;
1561 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1563 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1564 struct net *net = sock_net(skb->sk);
1565 struct netdev_queue *dev_queue;
1566 struct net_device *dev;
1567 int t, s_t;
1569 if (net != &init_net)
1570 return 0;
1572 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1573 return 0;
1574 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1575 return 0;
1577 s_t = cb->args[0];
1578 t = 0;
1580 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1581 goto done;
1583 dev_queue = &dev->rx_queue;
1584 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
1585 goto done;
1587 done:
1588 cb->args[0] = t;
1590 dev_put(dev);
1591 return skb->len;
1594 /* Main classifier routine: scans classifier chain attached
1595 to this qdisc, (optionally) tests for protocol and asks
1596 specific classifiers.
1598 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1599 struct tcf_result *res)
1601 __be16 protocol = skb->protocol;
1602 int err = 0;
1604 for (; tp; tp = tp->next) {
1605 if ((tp->protocol == protocol ||
1606 tp->protocol == htons(ETH_P_ALL)) &&
1607 (err = tp->classify(skb, tp, res)) >= 0) {
1608 #ifdef CONFIG_NET_CLS_ACT
1609 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1610 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1611 #endif
1612 return err;
1615 return -1;
1617 EXPORT_SYMBOL(tc_classify_compat);
1619 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1620 struct tcf_result *res)
1622 int err = 0;
1623 __be16 protocol;
1624 #ifdef CONFIG_NET_CLS_ACT
1625 struct tcf_proto *otp = tp;
1626 reclassify:
1627 #endif
1628 protocol = skb->protocol;
1630 err = tc_classify_compat(skb, tp, res);
1631 #ifdef CONFIG_NET_CLS_ACT
1632 if (err == TC_ACT_RECLASSIFY) {
1633 u32 verd = G_TC_VERD(skb->tc_verd);
1634 tp = otp;
1636 if (verd++ >= MAX_REC_LOOP) {
1637 printk("rule prio %u protocol %02x reclassify loop, "
1638 "packet dropped\n",
1639 tp->prio&0xffff, ntohs(tp->protocol));
1640 return TC_ACT_SHOT;
1642 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1643 goto reclassify;
1645 #endif
1646 return err;
1648 EXPORT_SYMBOL(tc_classify);
1650 void tcf_destroy(struct tcf_proto *tp)
1652 tp->ops->destroy(tp);
1653 module_put(tp->ops->owner);
1654 kfree(tp);
1657 void tcf_destroy_chain(struct tcf_proto **fl)
1659 struct tcf_proto *tp;
1661 while ((tp = *fl) != NULL) {
1662 *fl = tp->next;
1663 tcf_destroy(tp);
1666 EXPORT_SYMBOL(tcf_destroy_chain);
1668 #ifdef CONFIG_PROC_FS
1669 static int psched_show(struct seq_file *seq, void *v)
1671 struct timespec ts;
1673 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1674 seq_printf(seq, "%08x %08x %08x %08x\n",
1675 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1676 1000000,
1677 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1679 return 0;
1682 static int psched_open(struct inode *inode, struct file *file)
1684 return single_open(file, psched_show, PDE(inode)->data);
1687 static const struct file_operations psched_fops = {
1688 .owner = THIS_MODULE,
1689 .open = psched_open,
1690 .read = seq_read,
1691 .llseek = seq_lseek,
1692 .release = single_release,
1694 #endif
1696 static int __init pktsched_init(void)
1698 register_qdisc(&pfifo_qdisc_ops);
1699 register_qdisc(&bfifo_qdisc_ops);
1700 register_qdisc(&mq_qdisc_ops);
1701 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1703 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1704 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1705 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1706 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1707 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1708 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1710 return 0;
1713 subsys_initcall(pktsched_init);