2 * net/sched/cls_api.c Packet classifier API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/kmod.h>
26 #include <linux/slab.h>
27 #include <linux/idr.h>
28 #include <net/net_namespace.h>
30 #include <net/netlink.h>
31 #include <net/pkt_sched.h>
32 #include <net/pkt_cls.h>
34 /* The list of all installed classifier types */
35 static LIST_HEAD(tcf_proto_base
);
37 /* Protects list of registered TC modules. It is pure SMP lock. */
38 static DEFINE_RWLOCK(cls_mod_lock
);
40 /* Find classifier type by string name */
42 static const struct tcf_proto_ops
*tcf_proto_lookup_ops(const char *kind
)
44 const struct tcf_proto_ops
*t
, *res
= NULL
;
47 read_lock(&cls_mod_lock
);
48 list_for_each_entry(t
, &tcf_proto_base
, head
) {
49 if (strcmp(kind
, t
->kind
) == 0) {
50 if (try_module_get(t
->owner
))
55 read_unlock(&cls_mod_lock
);
60 /* Register(unregister) new classifier type */
62 int register_tcf_proto_ops(struct tcf_proto_ops
*ops
)
64 struct tcf_proto_ops
*t
;
67 write_lock(&cls_mod_lock
);
68 list_for_each_entry(t
, &tcf_proto_base
, head
)
69 if (!strcmp(ops
->kind
, t
->kind
))
72 list_add_tail(&ops
->head
, &tcf_proto_base
);
75 write_unlock(&cls_mod_lock
);
78 EXPORT_SYMBOL(register_tcf_proto_ops
);
80 static struct workqueue_struct
*tc_filter_wq
;
82 int unregister_tcf_proto_ops(struct tcf_proto_ops
*ops
)
84 struct tcf_proto_ops
*t
;
87 /* Wait for outstanding call_rcu()s, if any, from a
88 * tcf_proto_ops's destroy() handler.
91 flush_workqueue(tc_filter_wq
);
93 write_lock(&cls_mod_lock
);
94 list_for_each_entry(t
, &tcf_proto_base
, head
) {
101 write_unlock(&cls_mod_lock
);
104 EXPORT_SYMBOL(unregister_tcf_proto_ops
);
106 bool tcf_queue_work(struct work_struct
*work
)
108 return queue_work(tc_filter_wq
, work
);
110 EXPORT_SYMBOL(tcf_queue_work
);
112 /* Select new prio value from the range, managed by kernel. */
114 static inline u32
tcf_auto_prio(struct tcf_proto
*tp
)
116 u32 first
= TC_H_MAKE(0xC0000000U
, 0U);
119 first
= tp
->prio
- 1;
121 return TC_H_MAJ(first
);
124 static struct tcf_proto
*tcf_proto_create(const char *kind
, u32 protocol
,
125 u32 prio
, struct tcf_chain
*chain
,
126 struct netlink_ext_ack
*extack
)
128 struct tcf_proto
*tp
;
131 tp
= kzalloc(sizeof(*tp
), GFP_KERNEL
);
133 return ERR_PTR(-ENOBUFS
);
136 tp
->ops
= tcf_proto_lookup_ops(kind
);
138 #ifdef CONFIG_MODULES
140 request_module("cls_%s", kind
);
142 tp
->ops
= tcf_proto_lookup_ops(kind
);
143 /* We dropped the RTNL semaphore in order to perform
144 * the module load. So, even if we succeeded in loading
145 * the module we have to replay the request. We indicate
146 * this using -EAGAIN.
149 module_put(tp
->ops
->owner
);
152 NL_SET_ERR_MSG(extack
, "TC classifier not found");
158 tp
->classify
= tp
->ops
->classify
;
159 tp
->protocol
= protocol
;
163 err
= tp
->ops
->init(tp
);
165 module_put(tp
->ops
->owner
);
175 static void tcf_proto_destroy(struct tcf_proto
*tp
,
176 struct netlink_ext_ack
*extack
)
178 tp
->ops
->destroy(tp
, extack
);
179 module_put(tp
->ops
->owner
);
183 struct tcf_filter_chain_list_item
{
184 struct list_head list
;
185 tcf_chain_head_change_t
*chain_head_change
;
186 void *chain_head_change_priv
;
189 static struct tcf_chain
*tcf_chain_create(struct tcf_block
*block
,
192 struct tcf_chain
*chain
;
194 chain
= kzalloc(sizeof(*chain
), GFP_KERNEL
);
197 INIT_LIST_HEAD(&chain
->filter_chain_list
);
198 list_add_tail(&chain
->list
, &block
->chain_list
);
199 chain
->block
= block
;
200 chain
->index
= chain_index
;
205 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item
*item
,
206 struct tcf_proto
*tp_head
)
208 if (item
->chain_head_change
)
209 item
->chain_head_change(tp_head
, item
->chain_head_change_priv
);
211 static void tcf_chain_head_change(struct tcf_chain
*chain
,
212 struct tcf_proto
*tp_head
)
214 struct tcf_filter_chain_list_item
*item
;
216 list_for_each_entry(item
, &chain
->filter_chain_list
, list
)
217 tcf_chain_head_change_item(item
, tp_head
);
220 static void tcf_chain_flush(struct tcf_chain
*chain
)
222 struct tcf_proto
*tp
= rtnl_dereference(chain
->filter_chain
);
224 tcf_chain_head_change(chain
, NULL
);
226 RCU_INIT_POINTER(chain
->filter_chain
, tp
->next
);
227 tcf_proto_destroy(tp
, NULL
);
228 tp
= rtnl_dereference(chain
->filter_chain
);
229 tcf_chain_put(chain
);
233 static void tcf_chain_destroy(struct tcf_chain
*chain
)
235 struct tcf_block
*block
= chain
->block
;
237 list_del(&chain
->list
);
239 if (list_empty(&block
->chain_list
))
243 static void tcf_chain_hold(struct tcf_chain
*chain
)
248 struct tcf_chain
*tcf_chain_get(struct tcf_block
*block
, u32 chain_index
,
251 struct tcf_chain
*chain
;
253 list_for_each_entry(chain
, &block
->chain_list
, list
) {
254 if (chain
->index
== chain_index
) {
255 tcf_chain_hold(chain
);
260 return create
? tcf_chain_create(block
, chain_index
) : NULL
;
262 EXPORT_SYMBOL(tcf_chain_get
);
264 void tcf_chain_put(struct tcf_chain
*chain
)
266 if (--chain
->refcnt
== 0)
267 tcf_chain_destroy(chain
);
269 EXPORT_SYMBOL(tcf_chain_put
);
271 static bool tcf_block_offload_in_use(struct tcf_block
*block
)
273 return block
->offloadcnt
;
276 static int tcf_block_offload_cmd(struct tcf_block
*block
,
277 struct net_device
*dev
,
278 struct tcf_block_ext_info
*ei
,
279 enum tc_block_command command
)
281 struct tc_block_offload bo
= {};
283 bo
.command
= command
;
284 bo
.binder_type
= ei
->binder_type
;
286 return dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_BLOCK
, &bo
);
289 static int tcf_block_offload_bind(struct tcf_block
*block
, struct Qdisc
*q
,
290 struct tcf_block_ext_info
*ei
)
292 struct net_device
*dev
= q
->dev_queue
->dev
;
295 if (!dev
->netdev_ops
->ndo_setup_tc
)
296 goto no_offload_dev_inc
;
298 /* If tc offload feature is disabled and the block we try to bind
299 * to already has some offloaded filters, forbid to bind.
301 if (!tc_can_offload(dev
) && tcf_block_offload_in_use(block
))
304 err
= tcf_block_offload_cmd(block
, dev
, ei
, TC_BLOCK_BIND
);
305 if (err
== -EOPNOTSUPP
)
306 goto no_offload_dev_inc
;
310 if (tcf_block_offload_in_use(block
))
312 block
->nooffloaddevcnt
++;
316 static void tcf_block_offload_unbind(struct tcf_block
*block
, struct Qdisc
*q
,
317 struct tcf_block_ext_info
*ei
)
319 struct net_device
*dev
= q
->dev_queue
->dev
;
322 if (!dev
->netdev_ops
->ndo_setup_tc
)
323 goto no_offload_dev_dec
;
324 err
= tcf_block_offload_cmd(block
, dev
, ei
, TC_BLOCK_UNBIND
);
325 if (err
== -EOPNOTSUPP
)
326 goto no_offload_dev_dec
;
330 WARN_ON(block
->nooffloaddevcnt
-- == 0);
334 tcf_chain_head_change_cb_add(struct tcf_chain
*chain
,
335 struct tcf_block_ext_info
*ei
,
336 struct netlink_ext_ack
*extack
)
338 struct tcf_filter_chain_list_item
*item
;
340 item
= kmalloc(sizeof(*item
), GFP_KERNEL
);
342 NL_SET_ERR_MSG(extack
, "Memory allocation for head change callback item failed");
345 item
->chain_head_change
= ei
->chain_head_change
;
346 item
->chain_head_change_priv
= ei
->chain_head_change_priv
;
347 if (chain
->filter_chain
)
348 tcf_chain_head_change_item(item
, chain
->filter_chain
);
349 list_add(&item
->list
, &chain
->filter_chain_list
);
354 tcf_chain_head_change_cb_del(struct tcf_chain
*chain
,
355 struct tcf_block_ext_info
*ei
)
357 struct tcf_filter_chain_list_item
*item
;
359 list_for_each_entry(item
, &chain
->filter_chain_list
, list
) {
360 if ((!ei
->chain_head_change
&& !ei
->chain_head_change_priv
) ||
361 (item
->chain_head_change
== ei
->chain_head_change
&&
362 item
->chain_head_change_priv
== ei
->chain_head_change_priv
)) {
363 tcf_chain_head_change_item(item
, NULL
);
364 list_del(&item
->list
);
376 static unsigned int tcf_net_id
;
378 static int tcf_block_insert(struct tcf_block
*block
, struct net
*net
,
379 struct netlink_ext_ack
*extack
)
381 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
383 return idr_alloc_u32(&tn
->idr
, block
, &block
->index
, block
->index
,
387 static void tcf_block_remove(struct tcf_block
*block
, struct net
*net
)
389 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
391 idr_remove(&tn
->idr
, block
->index
);
394 static struct tcf_block
*tcf_block_create(struct net
*net
, struct Qdisc
*q
,
396 struct netlink_ext_ack
*extack
)
398 struct tcf_block
*block
;
399 struct tcf_chain
*chain
;
402 block
= kzalloc(sizeof(*block
), GFP_KERNEL
);
404 NL_SET_ERR_MSG(extack
, "Memory allocation for block failed");
405 return ERR_PTR(-ENOMEM
);
407 INIT_LIST_HEAD(&block
->chain_list
);
408 INIT_LIST_HEAD(&block
->cb_list
);
409 INIT_LIST_HEAD(&block
->owner_list
);
411 /* Create chain 0 by default, it has to be always present. */
412 chain
= tcf_chain_create(block
, 0);
414 NL_SET_ERR_MSG(extack
, "Failed to create new tcf chain");
416 goto err_chain_create
;
420 block
->index
= block_index
;
422 /* Don't store q pointer for blocks which are shared */
423 if (!tcf_block_shared(block
))
432 static struct tcf_block
*tcf_block_lookup(struct net
*net
, u32 block_index
)
434 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
436 return idr_find(&tn
->idr
, block_index
);
439 static struct tcf_chain
*tcf_block_chain_zero(struct tcf_block
*block
)
441 return list_first_entry(&block
->chain_list
, struct tcf_chain
, list
);
444 struct tcf_block_owner_item
{
445 struct list_head list
;
447 enum tcf_block_binder_type binder_type
;
451 tcf_block_owner_netif_keep_dst(struct tcf_block
*block
,
453 enum tcf_block_binder_type binder_type
)
455 if (block
->keep_dst
&&
456 binder_type
!= TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS
&&
457 binder_type
!= TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS
)
458 netif_keep_dst(qdisc_dev(q
));
461 void tcf_block_netif_keep_dst(struct tcf_block
*block
)
463 struct tcf_block_owner_item
*item
;
465 block
->keep_dst
= true;
466 list_for_each_entry(item
, &block
->owner_list
, list
)
467 tcf_block_owner_netif_keep_dst(block
, item
->q
,
470 EXPORT_SYMBOL(tcf_block_netif_keep_dst
);
472 static int tcf_block_owner_add(struct tcf_block
*block
,
474 enum tcf_block_binder_type binder_type
)
476 struct tcf_block_owner_item
*item
;
478 item
= kmalloc(sizeof(*item
), GFP_KERNEL
);
482 item
->binder_type
= binder_type
;
483 list_add(&item
->list
, &block
->owner_list
);
487 static void tcf_block_owner_del(struct tcf_block
*block
,
489 enum tcf_block_binder_type binder_type
)
491 struct tcf_block_owner_item
*item
;
493 list_for_each_entry(item
, &block
->owner_list
, list
) {
494 if (item
->q
== q
&& item
->binder_type
== binder_type
) {
495 list_del(&item
->list
);
503 int tcf_block_get_ext(struct tcf_block
**p_block
, struct Qdisc
*q
,
504 struct tcf_block_ext_info
*ei
,
505 struct netlink_ext_ack
*extack
)
507 struct net
*net
= qdisc_net(q
);
508 struct tcf_block
*block
= NULL
;
509 bool created
= false;
512 if (ei
->block_index
) {
513 /* block_index not 0 means the shared block is requested */
514 block
= tcf_block_lookup(net
, ei
->block_index
);
520 block
= tcf_block_create(net
, q
, ei
->block_index
, extack
);
522 return PTR_ERR(block
);
524 if (tcf_block_shared(block
)) {
525 err
= tcf_block_insert(block
, net
, extack
);
527 goto err_block_insert
;
531 err
= tcf_block_owner_add(block
, q
, ei
->binder_type
);
533 goto err_block_owner_add
;
535 tcf_block_owner_netif_keep_dst(block
, q
, ei
->binder_type
);
537 err
= tcf_chain_head_change_cb_add(tcf_block_chain_zero(block
),
540 goto err_chain_head_change_cb_add
;
542 err
= tcf_block_offload_bind(block
, q
, ei
);
544 goto err_block_offload_bind
;
549 err_block_offload_bind
:
550 tcf_chain_head_change_cb_del(tcf_block_chain_zero(block
), ei
);
551 err_chain_head_change_cb_add
:
552 tcf_block_owner_del(block
, q
, ei
->binder_type
);
555 if (tcf_block_shared(block
))
556 tcf_block_remove(block
, net
);
558 kfree(tcf_block_chain_zero(block
));
565 EXPORT_SYMBOL(tcf_block_get_ext
);
567 static void tcf_chain_head_change_dflt(struct tcf_proto
*tp_head
, void *priv
)
569 struct tcf_proto __rcu
**p_filter_chain
= priv
;
571 rcu_assign_pointer(*p_filter_chain
, tp_head
);
574 int tcf_block_get(struct tcf_block
**p_block
,
575 struct tcf_proto __rcu
**p_filter_chain
, struct Qdisc
*q
,
576 struct netlink_ext_ack
*extack
)
578 struct tcf_block_ext_info ei
= {
579 .chain_head_change
= tcf_chain_head_change_dflt
,
580 .chain_head_change_priv
= p_filter_chain
,
583 WARN_ON(!p_filter_chain
);
584 return tcf_block_get_ext(p_block
, q
, &ei
, extack
);
586 EXPORT_SYMBOL(tcf_block_get
);
588 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
589 * actions should be all removed after flushing.
591 void tcf_block_put_ext(struct tcf_block
*block
, struct Qdisc
*q
,
592 struct tcf_block_ext_info
*ei
)
594 struct tcf_chain
*chain
, *tmp
;
598 tcf_chain_head_change_cb_del(tcf_block_chain_zero(block
), ei
);
599 tcf_block_owner_del(block
, q
, ei
->binder_type
);
601 if (--block
->refcnt
== 0) {
602 if (tcf_block_shared(block
))
603 tcf_block_remove(block
, block
->net
);
605 /* Hold a refcnt for all chains, so that they don't disappear
606 * while we are iterating.
608 list_for_each_entry(chain
, &block
->chain_list
, list
)
609 tcf_chain_hold(chain
);
611 list_for_each_entry(chain
, &block
->chain_list
, list
)
612 tcf_chain_flush(chain
);
615 tcf_block_offload_unbind(block
, q
, ei
);
617 if (block
->refcnt
== 0) {
618 /* At this point, all the chains should have refcnt >= 1. */
619 list_for_each_entry_safe(chain
, tmp
, &block
->chain_list
, list
)
620 tcf_chain_put(chain
);
622 /* Finally, put chain 0 and allow block to be freed. */
623 tcf_chain_put(tcf_block_chain_zero(block
));
626 EXPORT_SYMBOL(tcf_block_put_ext
);
628 void tcf_block_put(struct tcf_block
*block
)
630 struct tcf_block_ext_info ei
= {0, };
634 tcf_block_put_ext(block
, block
->q
, &ei
);
637 EXPORT_SYMBOL(tcf_block_put
);
639 struct tcf_block_cb
{
640 struct list_head list
;
647 void *tcf_block_cb_priv(struct tcf_block_cb
*block_cb
)
649 return block_cb
->cb_priv
;
651 EXPORT_SYMBOL(tcf_block_cb_priv
);
653 struct tcf_block_cb
*tcf_block_cb_lookup(struct tcf_block
*block
,
654 tc_setup_cb_t
*cb
, void *cb_ident
)
655 { struct tcf_block_cb
*block_cb
;
657 list_for_each_entry(block_cb
, &block
->cb_list
, list
)
658 if (block_cb
->cb
== cb
&& block_cb
->cb_ident
== cb_ident
)
662 EXPORT_SYMBOL(tcf_block_cb_lookup
);
664 void tcf_block_cb_incref(struct tcf_block_cb
*block_cb
)
668 EXPORT_SYMBOL(tcf_block_cb_incref
);
670 unsigned int tcf_block_cb_decref(struct tcf_block_cb
*block_cb
)
672 return --block_cb
->refcnt
;
674 EXPORT_SYMBOL(tcf_block_cb_decref
);
676 struct tcf_block_cb
*__tcf_block_cb_register(struct tcf_block
*block
,
677 tc_setup_cb_t
*cb
, void *cb_ident
,
680 struct tcf_block_cb
*block_cb
;
682 /* At this point, playback of previous block cb calls is not supported,
683 * so forbid to register to block which already has some offloaded
686 if (tcf_block_offload_in_use(block
))
687 return ERR_PTR(-EOPNOTSUPP
);
689 block_cb
= kzalloc(sizeof(*block_cb
), GFP_KERNEL
);
691 return ERR_PTR(-ENOMEM
);
693 block_cb
->cb_ident
= cb_ident
;
694 block_cb
->cb_priv
= cb_priv
;
695 list_add(&block_cb
->list
, &block
->cb_list
);
698 EXPORT_SYMBOL(__tcf_block_cb_register
);
700 int tcf_block_cb_register(struct tcf_block
*block
,
701 tc_setup_cb_t
*cb
, void *cb_ident
,
704 struct tcf_block_cb
*block_cb
;
706 block_cb
= __tcf_block_cb_register(block
, cb
, cb_ident
, cb_priv
);
707 return IS_ERR(block_cb
) ? PTR_ERR(block_cb
) : 0;
709 EXPORT_SYMBOL(tcf_block_cb_register
);
711 void __tcf_block_cb_unregister(struct tcf_block_cb
*block_cb
)
713 list_del(&block_cb
->list
);
716 EXPORT_SYMBOL(__tcf_block_cb_unregister
);
718 void tcf_block_cb_unregister(struct tcf_block
*block
,
719 tc_setup_cb_t
*cb
, void *cb_ident
)
721 struct tcf_block_cb
*block_cb
;
723 block_cb
= tcf_block_cb_lookup(block
, cb
, cb_ident
);
726 __tcf_block_cb_unregister(block_cb
);
728 EXPORT_SYMBOL(tcf_block_cb_unregister
);
730 static int tcf_block_cb_call(struct tcf_block
*block
, enum tc_setup_type type
,
731 void *type_data
, bool err_stop
)
733 struct tcf_block_cb
*block_cb
;
737 /* Make sure all netdevs sharing this block are offload-capable. */
738 if (block
->nooffloaddevcnt
&& err_stop
)
741 list_for_each_entry(block_cb
, &block
->cb_list
, list
) {
742 err
= block_cb
->cb(type
, type_data
, block_cb
->cb_priv
);
753 /* Main classifier routine: scans classifier chain attached
754 * to this qdisc, (optionally) tests for protocol and asks
755 * specific classifiers.
757 int tcf_classify(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
758 struct tcf_result
*res
, bool compat_mode
)
760 __be16 protocol
= tc_skb_protocol(skb
);
761 #ifdef CONFIG_NET_CLS_ACT
762 const int max_reclassify_loop
= 4;
763 const struct tcf_proto
*orig_tp
= tp
;
764 const struct tcf_proto
*first_tp
;
769 for (; tp
; tp
= rcu_dereference_bh(tp
->next
)) {
772 if (tp
->protocol
!= protocol
&&
773 tp
->protocol
!= htons(ETH_P_ALL
))
776 err
= tp
->classify(skb
, tp
, res
);
777 #ifdef CONFIG_NET_CLS_ACT
778 if (unlikely(err
== TC_ACT_RECLASSIFY
&& !compat_mode
)) {
781 } else if (unlikely(TC_ACT_EXT_CMP(err
, TC_ACT_GOTO_CHAIN
))) {
782 first_tp
= res
->goto_tp
;
790 return TC_ACT_UNSPEC
; /* signal: continue lookup */
791 #ifdef CONFIG_NET_CLS_ACT
793 if (unlikely(limit
++ >= max_reclassify_loop
)) {
794 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
795 tp
->chain
->block
->index
,
797 ntohs(tp
->protocol
));
802 protocol
= tc_skb_protocol(skb
);
806 EXPORT_SYMBOL(tcf_classify
);
808 struct tcf_chain_info
{
809 struct tcf_proto __rcu
**pprev
;
810 struct tcf_proto __rcu
*next
;
813 static struct tcf_proto
*tcf_chain_tp_prev(struct tcf_chain_info
*chain_info
)
815 return rtnl_dereference(*chain_info
->pprev
);
818 static void tcf_chain_tp_insert(struct tcf_chain
*chain
,
819 struct tcf_chain_info
*chain_info
,
820 struct tcf_proto
*tp
)
822 if (*chain_info
->pprev
== chain
->filter_chain
)
823 tcf_chain_head_change(chain
, tp
);
824 RCU_INIT_POINTER(tp
->next
, tcf_chain_tp_prev(chain_info
));
825 rcu_assign_pointer(*chain_info
->pprev
, tp
);
826 tcf_chain_hold(chain
);
829 static void tcf_chain_tp_remove(struct tcf_chain
*chain
,
830 struct tcf_chain_info
*chain_info
,
831 struct tcf_proto
*tp
)
833 struct tcf_proto
*next
= rtnl_dereference(chain_info
->next
);
835 if (tp
== chain
->filter_chain
)
836 tcf_chain_head_change(chain
, next
);
837 RCU_INIT_POINTER(*chain_info
->pprev
, next
);
838 tcf_chain_put(chain
);
841 static struct tcf_proto
*tcf_chain_tp_find(struct tcf_chain
*chain
,
842 struct tcf_chain_info
*chain_info
,
843 u32 protocol
, u32 prio
,
846 struct tcf_proto
**pprev
;
847 struct tcf_proto
*tp
;
849 /* Check the chain for existence of proto-tcf with this priority */
850 for (pprev
= &chain
->filter_chain
;
851 (tp
= rtnl_dereference(*pprev
)); pprev
= &tp
->next
) {
852 if (tp
->prio
>= prio
) {
853 if (tp
->prio
== prio
) {
855 (tp
->protocol
!= protocol
&& protocol
))
856 return ERR_PTR(-EINVAL
);
863 chain_info
->pprev
= pprev
;
864 chain_info
->next
= tp
? tp
->next
: NULL
;
868 static int tcf_fill_node(struct net
*net
, struct sk_buff
*skb
,
869 struct tcf_proto
*tp
, struct tcf_block
*block
,
870 struct Qdisc
*q
, u32 parent
, void *fh
,
871 u32 portid
, u32 seq
, u16 flags
, int event
)
874 struct nlmsghdr
*nlh
;
875 unsigned char *b
= skb_tail_pointer(skb
);
877 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*tcm
), flags
);
880 tcm
= nlmsg_data(nlh
);
881 tcm
->tcm_family
= AF_UNSPEC
;
885 tcm
->tcm_ifindex
= qdisc_dev(q
)->ifindex
;
886 tcm
->tcm_parent
= parent
;
888 tcm
->tcm_ifindex
= TCM_IFINDEX_MAGIC_BLOCK
;
889 tcm
->tcm_block_index
= block
->index
;
891 tcm
->tcm_info
= TC_H_MAKE(tp
->prio
, tp
->protocol
);
892 if (nla_put_string(skb
, TCA_KIND
, tp
->ops
->kind
))
893 goto nla_put_failure
;
894 if (nla_put_u32(skb
, TCA_CHAIN
, tp
->chain
->index
))
895 goto nla_put_failure
;
899 if (tp
->ops
->dump
&& tp
->ops
->dump(net
, tp
, fh
, skb
, tcm
) < 0)
900 goto nla_put_failure
;
902 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
911 static int tfilter_notify(struct net
*net
, struct sk_buff
*oskb
,
912 struct nlmsghdr
*n
, struct tcf_proto
*tp
,
913 struct tcf_block
*block
, struct Qdisc
*q
,
914 u32 parent
, void *fh
, int event
, bool unicast
)
917 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
919 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
923 if (tcf_fill_node(net
, skb
, tp
, block
, q
, parent
, fh
, portid
,
924 n
->nlmsg_seq
, n
->nlmsg_flags
, event
) <= 0) {
930 return netlink_unicast(net
->rtnl
, skb
, portid
, MSG_DONTWAIT
);
932 return rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
,
933 n
->nlmsg_flags
& NLM_F_ECHO
);
936 static int tfilter_del_notify(struct net
*net
, struct sk_buff
*oskb
,
937 struct nlmsghdr
*n
, struct tcf_proto
*tp
,
938 struct tcf_block
*block
, struct Qdisc
*q
,
939 u32 parent
, void *fh
, bool unicast
, bool *last
,
940 struct netlink_ext_ack
*extack
)
943 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
946 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
950 if (tcf_fill_node(net
, skb
, tp
, block
, q
, parent
, fh
, portid
,
951 n
->nlmsg_seq
, n
->nlmsg_flags
, RTM_DELTFILTER
) <= 0) {
952 NL_SET_ERR_MSG(extack
, "Failed to build del event notification");
957 err
= tp
->ops
->delete(tp
, fh
, last
, extack
);
964 return netlink_unicast(net
->rtnl
, skb
, portid
, MSG_DONTWAIT
);
966 err
= rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
,
967 n
->nlmsg_flags
& NLM_F_ECHO
);
969 NL_SET_ERR_MSG(extack
, "Failed to send filter delete notification");
973 static void tfilter_notify_chain(struct net
*net
, struct sk_buff
*oskb
,
974 struct tcf_block
*block
, struct Qdisc
*q
,
975 u32 parent
, struct nlmsghdr
*n
,
976 struct tcf_chain
*chain
, int event
)
978 struct tcf_proto
*tp
;
980 for (tp
= rtnl_dereference(chain
->filter_chain
);
981 tp
; tp
= rtnl_dereference(tp
->next
))
982 tfilter_notify(net
, oskb
, n
, tp
, block
,
983 q
, parent
, 0, event
, false);
986 /* Add/change/delete/get a filter node */
988 static int tc_ctl_tfilter(struct sk_buff
*skb
, struct nlmsghdr
*n
,
989 struct netlink_ext_ack
*extack
)
991 struct net
*net
= sock_net(skb
->sk
);
992 struct nlattr
*tca
[TCA_MAX
+ 1];
999 struct Qdisc
*q
= NULL
;
1000 struct tcf_chain_info chain_info
;
1001 struct tcf_chain
*chain
= NULL
;
1002 struct tcf_block
*block
;
1003 struct tcf_proto
*tp
;
1009 if ((n
->nlmsg_type
!= RTM_GETTFILTER
) &&
1010 !netlink_ns_capable(skb
, net
->user_ns
, CAP_NET_ADMIN
))
1016 err
= nlmsg_parse(n
, sizeof(*t
), tca
, TCA_MAX
, NULL
, extack
);
1021 protocol
= TC_H_MIN(t
->tcm_info
);
1022 prio
= TC_H_MAJ(t
->tcm_info
);
1023 prio_allocate
= false;
1024 parent
= t
->tcm_parent
;
1028 switch (n
->nlmsg_type
) {
1029 case RTM_DELTFILTER
:
1030 if (protocol
|| t
->tcm_handle
|| tca
[TCA_KIND
]) {
1031 NL_SET_ERR_MSG(extack
, "Cannot flush filters with protocol, handle or kind set");
1035 case RTM_NEWTFILTER
:
1036 /* If no priority is provided by the user,
1039 if (n
->nlmsg_flags
& NLM_F_CREATE
) {
1040 prio
= TC_H_MAKE(0x80000000U
, 0U);
1041 prio_allocate
= true;
1046 NL_SET_ERR_MSG(extack
, "Invalid filter command with priority of zero");
1051 /* Find head of filter chain. */
1053 if (t
->tcm_ifindex
== TCM_IFINDEX_MAGIC_BLOCK
) {
1054 block
= tcf_block_lookup(net
, t
->tcm_block_index
);
1056 NL_SET_ERR_MSG(extack
, "Block of given index was not found");
1061 const struct Qdisc_class_ops
*cops
;
1062 struct net_device
*dev
;
1065 dev
= __dev_get_by_index(net
, t
->tcm_ifindex
);
1074 q
= qdisc_lookup(dev
, TC_H_MAJ(t
->tcm_parent
));
1076 NL_SET_ERR_MSG(extack
, "Parent Qdisc doesn't exists");
1081 /* Is it classful? */
1082 cops
= q
->ops
->cl_ops
;
1084 NL_SET_ERR_MSG(extack
, "Qdisc not classful");
1088 if (!cops
->tcf_block
) {
1089 NL_SET_ERR_MSG(extack
, "Class doesn't support blocks");
1093 /* Do we search for filter, attached to class? */
1094 if (TC_H_MIN(parent
)) {
1095 cl
= cops
->find(q
, parent
);
1097 NL_SET_ERR_MSG(extack
, "Specified class doesn't exist");
1102 /* And the last stroke */
1103 block
= cops
->tcf_block(q
, cl
, extack
);
1108 if (tcf_block_shared(block
)) {
1109 NL_SET_ERR_MSG(extack
, "This filter block is shared. Please use the block index to manipulate the filters");
1115 chain_index
= tca
[TCA_CHAIN
] ? nla_get_u32(tca
[TCA_CHAIN
]) : 0;
1116 if (chain_index
> TC_ACT_EXT_VAL_MASK
) {
1117 NL_SET_ERR_MSG(extack
, "Specified chain index exceeds upper limit");
1121 chain
= tcf_chain_get(block
, chain_index
,
1122 n
->nlmsg_type
== RTM_NEWTFILTER
);
1124 NL_SET_ERR_MSG(extack
, "Cannot find specified filter chain");
1125 err
= n
->nlmsg_type
== RTM_NEWTFILTER
? -ENOMEM
: -EINVAL
;
1129 if (n
->nlmsg_type
== RTM_DELTFILTER
&& prio
== 0) {
1130 tfilter_notify_chain(net
, skb
, block
, q
, parent
, n
,
1131 chain
, RTM_DELTFILTER
);
1132 tcf_chain_flush(chain
);
1137 tp
= tcf_chain_tp_find(chain
, &chain_info
, protocol
,
1138 prio
, prio_allocate
);
1140 NL_SET_ERR_MSG(extack
, "Filter with specified priority/protocol not found");
1146 /* Proto-tcf does not exist, create new one */
1148 if (tca
[TCA_KIND
] == NULL
|| !protocol
) {
1149 NL_SET_ERR_MSG(extack
, "Filter kind and protocol must be specified");
1154 if (n
->nlmsg_type
!= RTM_NEWTFILTER
||
1155 !(n
->nlmsg_flags
& NLM_F_CREATE
)) {
1156 NL_SET_ERR_MSG(extack
, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1162 prio
= tcf_auto_prio(tcf_chain_tp_prev(&chain_info
));
1164 tp
= tcf_proto_create(nla_data(tca
[TCA_KIND
]),
1165 protocol
, prio
, chain
, extack
);
1171 } else if (tca
[TCA_KIND
] && nla_strcmp(tca
[TCA_KIND
], tp
->ops
->kind
)) {
1172 NL_SET_ERR_MSG(extack
, "Specified filter kind does not match existing one");
1177 fh
= tp
->ops
->get(tp
, t
->tcm_handle
);
1180 if (n
->nlmsg_type
== RTM_DELTFILTER
&& t
->tcm_handle
== 0) {
1181 tcf_chain_tp_remove(chain
, &chain_info
, tp
);
1182 tfilter_notify(net
, skb
, n
, tp
, block
, q
, parent
, fh
,
1183 RTM_DELTFILTER
, false);
1184 tcf_proto_destroy(tp
, extack
);
1189 if (n
->nlmsg_type
!= RTM_NEWTFILTER
||
1190 !(n
->nlmsg_flags
& NLM_F_CREATE
)) {
1191 NL_SET_ERR_MSG(extack
, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
1198 switch (n
->nlmsg_type
) {
1199 case RTM_NEWTFILTER
:
1200 if (n
->nlmsg_flags
& NLM_F_EXCL
) {
1202 tcf_proto_destroy(tp
, NULL
);
1203 NL_SET_ERR_MSG(extack
, "Filter already exists");
1208 case RTM_DELTFILTER
:
1209 err
= tfilter_del_notify(net
, skb
, n
, tp
, block
,
1210 q
, parent
, fh
, false, &last
,
1215 tcf_chain_tp_remove(chain
, &chain_info
, tp
);
1216 tcf_proto_destroy(tp
, extack
);
1219 case RTM_GETTFILTER
:
1220 err
= tfilter_notify(net
, skb
, n
, tp
, block
, q
, parent
,
1221 fh
, RTM_NEWTFILTER
, true);
1223 NL_SET_ERR_MSG(extack
, "Failed to send filter notify message");
1226 NL_SET_ERR_MSG(extack
, "Invalid netlink message type");
1232 err
= tp
->ops
->change(net
, skb
, tp
, cl
, t
->tcm_handle
, tca
, &fh
,
1233 n
->nlmsg_flags
& NLM_F_CREATE
? TCA_ACT_NOREPLACE
: TCA_ACT_REPLACE
,
1237 tcf_chain_tp_insert(chain
, &chain_info
, tp
);
1238 tfilter_notify(net
, skb
, n
, tp
, block
, q
, parent
, fh
,
1239 RTM_NEWTFILTER
, false);
1242 tcf_proto_destroy(tp
, NULL
);
1247 tcf_chain_put(chain
);
1249 /* Replay the request. */
1254 struct tcf_dump_args
{
1255 struct tcf_walker w
;
1256 struct sk_buff
*skb
;
1257 struct netlink_callback
*cb
;
1258 struct tcf_block
*block
;
1263 static int tcf_node_dump(struct tcf_proto
*tp
, void *n
, struct tcf_walker
*arg
)
1265 struct tcf_dump_args
*a
= (void *)arg
;
1266 struct net
*net
= sock_net(a
->skb
->sk
);
1268 return tcf_fill_node(net
, a
->skb
, tp
, a
->block
, a
->q
, a
->parent
,
1269 n
, NETLINK_CB(a
->cb
->skb
).portid
,
1270 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1274 static bool tcf_chain_dump(struct tcf_chain
*chain
, struct Qdisc
*q
, u32 parent
,
1275 struct sk_buff
*skb
, struct netlink_callback
*cb
,
1276 long index_start
, long *p_index
)
1278 struct net
*net
= sock_net(skb
->sk
);
1279 struct tcf_block
*block
= chain
->block
;
1280 struct tcmsg
*tcm
= nlmsg_data(cb
->nlh
);
1281 struct tcf_dump_args arg
;
1282 struct tcf_proto
*tp
;
1284 for (tp
= rtnl_dereference(chain
->filter_chain
);
1285 tp
; tp
= rtnl_dereference(tp
->next
), (*p_index
)++) {
1286 if (*p_index
< index_start
)
1288 if (TC_H_MAJ(tcm
->tcm_info
) &&
1289 TC_H_MAJ(tcm
->tcm_info
) != tp
->prio
)
1291 if (TC_H_MIN(tcm
->tcm_info
) &&
1292 TC_H_MIN(tcm
->tcm_info
) != tp
->protocol
)
1294 if (*p_index
> index_start
)
1295 memset(&cb
->args
[1], 0,
1296 sizeof(cb
->args
) - sizeof(cb
->args
[0]));
1297 if (cb
->args
[1] == 0) {
1298 if (tcf_fill_node(net
, skb
, tp
, block
, q
, parent
, 0,
1299 NETLINK_CB(cb
->skb
).portid
,
1300 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
1301 RTM_NEWTFILTER
) <= 0)
1308 arg
.w
.fn
= tcf_node_dump
;
1313 arg
.parent
= parent
;
1315 arg
.w
.skip
= cb
->args
[1] - 1;
1317 tp
->ops
->walk(tp
, &arg
.w
);
1318 cb
->args
[1] = arg
.w
.count
+ 1;
1325 /* called with RTNL */
1326 static int tc_dump_tfilter(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1328 struct net
*net
= sock_net(skb
->sk
);
1329 struct nlattr
*tca
[TCA_MAX
+ 1];
1330 struct Qdisc
*q
= NULL
;
1331 struct tcf_block
*block
;
1332 struct tcf_chain
*chain
;
1333 struct tcmsg
*tcm
= nlmsg_data(cb
->nlh
);
1339 if (nlmsg_len(cb
->nlh
) < sizeof(*tcm
))
1342 err
= nlmsg_parse(cb
->nlh
, sizeof(*tcm
), tca
, TCA_MAX
, NULL
, NULL
);
1346 if (tcm
->tcm_ifindex
== TCM_IFINDEX_MAGIC_BLOCK
) {
1347 block
= tcf_block_lookup(net
, tcm
->tcm_block_index
);
1350 /* If we work with block index, q is NULL and parent value
1351 * will never be used in the following code. The check
1352 * in tcf_fill_node prevents it. However, compiler does not
1353 * see that far, so set parent to zero to silence the warning
1354 * about parent being uninitialized.
1358 const struct Qdisc_class_ops
*cops
;
1359 struct net_device
*dev
;
1360 unsigned long cl
= 0;
1362 dev
= __dev_get_by_index(net
, tcm
->tcm_ifindex
);
1366 parent
= tcm
->tcm_parent
;
1371 q
= qdisc_lookup(dev
, TC_H_MAJ(tcm
->tcm_parent
));
1375 cops
= q
->ops
->cl_ops
;
1378 if (!cops
->tcf_block
)
1380 if (TC_H_MIN(tcm
->tcm_parent
)) {
1381 cl
= cops
->find(q
, tcm
->tcm_parent
);
1385 block
= cops
->tcf_block(q
, cl
, NULL
);
1388 if (tcf_block_shared(block
))
1392 index_start
= cb
->args
[0];
1395 list_for_each_entry(chain
, &block
->chain_list
, list
) {
1396 if (tca
[TCA_CHAIN
] &&
1397 nla_get_u32(tca
[TCA_CHAIN
]) != chain
->index
)
1399 if (!tcf_chain_dump(chain
, q
, parent
, skb
, cb
,
1400 index_start
, &index
)) {
1406 cb
->args
[0] = index
;
1409 /* If we did no progress, the error (EMSGSIZE) is real */
1410 if (skb
->len
== 0 && err
)
1415 void tcf_exts_destroy(struct tcf_exts
*exts
)
1417 #ifdef CONFIG_NET_CLS_ACT
1421 tcf_exts_to_list(exts
, &actions
);
1422 tcf_action_destroy(&actions
, TCA_ACT_UNBIND
);
1423 kfree(exts
->actions
);
1424 exts
->nr_actions
= 0;
1427 EXPORT_SYMBOL(tcf_exts_destroy
);
1429 int tcf_exts_validate(struct net
*net
, struct tcf_proto
*tp
, struct nlattr
**tb
,
1430 struct nlattr
*rate_tlv
, struct tcf_exts
*exts
, bool ovr
,
1431 struct netlink_ext_ack
*extack
)
1433 #ifdef CONFIG_NET_CLS_ACT
1435 struct tc_action
*act
;
1437 if (exts
->police
&& tb
[exts
->police
]) {
1438 act
= tcf_action_init_1(net
, tp
, tb
[exts
->police
],
1439 rate_tlv
, "police", ovr
,
1442 return PTR_ERR(act
);
1444 act
->type
= exts
->type
= TCA_OLD_COMPAT
;
1445 exts
->actions
[0] = act
;
1446 exts
->nr_actions
= 1;
1447 } else if (exts
->action
&& tb
[exts
->action
]) {
1451 err
= tcf_action_init(net
, tp
, tb
[exts
->action
],
1452 rate_tlv
, NULL
, ovr
, TCA_ACT_BIND
,
1456 list_for_each_entry(act
, &actions
, list
)
1457 exts
->actions
[i
++] = act
;
1458 exts
->nr_actions
= i
;
1463 if ((exts
->action
&& tb
[exts
->action
]) ||
1464 (exts
->police
&& tb
[exts
->police
])) {
1465 NL_SET_ERR_MSG(extack
, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
1472 EXPORT_SYMBOL(tcf_exts_validate
);
1474 void tcf_exts_change(struct tcf_exts
*dst
, struct tcf_exts
*src
)
1476 #ifdef CONFIG_NET_CLS_ACT
1477 struct tcf_exts old
= *dst
;
1480 tcf_exts_destroy(&old
);
1483 EXPORT_SYMBOL(tcf_exts_change
);
1485 #ifdef CONFIG_NET_CLS_ACT
1486 static struct tc_action
*tcf_exts_first_act(struct tcf_exts
*exts
)
1488 if (exts
->nr_actions
== 0)
1491 return exts
->actions
[0];
1495 int tcf_exts_dump(struct sk_buff
*skb
, struct tcf_exts
*exts
)
1497 #ifdef CONFIG_NET_CLS_ACT
1498 struct nlattr
*nest
;
1500 if (exts
->action
&& tcf_exts_has_actions(exts
)) {
1502 * again for backward compatible mode - we want
1503 * to work with both old and new modes of entering
1504 * tc data even if iproute2 was newer - jhs
1506 if (exts
->type
!= TCA_OLD_COMPAT
) {
1509 nest
= nla_nest_start(skb
, exts
->action
);
1511 goto nla_put_failure
;
1513 tcf_exts_to_list(exts
, &actions
);
1514 if (tcf_action_dump(skb
, &actions
, 0, 0) < 0)
1515 goto nla_put_failure
;
1516 nla_nest_end(skb
, nest
);
1517 } else if (exts
->police
) {
1518 struct tc_action
*act
= tcf_exts_first_act(exts
);
1519 nest
= nla_nest_start(skb
, exts
->police
);
1520 if (nest
== NULL
|| !act
)
1521 goto nla_put_failure
;
1522 if (tcf_action_dump_old(skb
, act
, 0, 0) < 0)
1523 goto nla_put_failure
;
1524 nla_nest_end(skb
, nest
);
1530 nla_nest_cancel(skb
, nest
);
1536 EXPORT_SYMBOL(tcf_exts_dump
);
1539 int tcf_exts_dump_stats(struct sk_buff
*skb
, struct tcf_exts
*exts
)
1541 #ifdef CONFIG_NET_CLS_ACT
1542 struct tc_action
*a
= tcf_exts_first_act(exts
);
1543 if (a
!= NULL
&& tcf_action_copy_stats(skb
, a
, 1) < 0)
1548 EXPORT_SYMBOL(tcf_exts_dump_stats
);
1550 static int tc_exts_setup_cb_egdev_call(struct tcf_exts
*exts
,
1551 enum tc_setup_type type
,
1552 void *type_data
, bool err_stop
)
1555 #ifdef CONFIG_NET_CLS_ACT
1556 const struct tc_action
*a
;
1557 struct net_device
*dev
;
1560 if (!tcf_exts_has_actions(exts
))
1563 for (i
= 0; i
< exts
->nr_actions
; i
++) {
1564 a
= exts
->actions
[i
];
1565 if (!a
->ops
->get_dev
)
1567 dev
= a
->ops
->get_dev(a
);
1570 ret
= tc_setup_cb_egdev_call(dev
, type
, type_data
, err_stop
);
1579 int tc_setup_cb_call(struct tcf_block
*block
, struct tcf_exts
*exts
,
1580 enum tc_setup_type type
, void *type_data
, bool err_stop
)
1585 ret
= tcf_block_cb_call(block
, type
, type_data
, err_stop
);
1592 ret
= tc_exts_setup_cb_egdev_call(exts
, type
, type_data
, err_stop
);
1599 EXPORT_SYMBOL(tc_setup_cb_call
);
1601 static __net_init
int tcf_net_init(struct net
*net
)
1603 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
1609 static void __net_exit
tcf_net_exit(struct net
*net
)
1611 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
1613 idr_destroy(&tn
->idr
);
1616 static struct pernet_operations tcf_net_ops
= {
1617 .init
= tcf_net_init
,
1618 .exit
= tcf_net_exit
,
1620 .size
= sizeof(struct tcf_net
),
1623 static int __init
tc_filter_init(void)
1627 tc_filter_wq
= alloc_ordered_workqueue("tc_filter_workqueue", 0);
1631 err
= register_pernet_subsys(&tcf_net_ops
);
1633 goto err_register_pernet_subsys
;
1635 rtnl_register(PF_UNSPEC
, RTM_NEWTFILTER
, tc_ctl_tfilter
, NULL
, 0);
1636 rtnl_register(PF_UNSPEC
, RTM_DELTFILTER
, tc_ctl_tfilter
, NULL
, 0);
1637 rtnl_register(PF_UNSPEC
, RTM_GETTFILTER
, tc_ctl_tfilter
,
1638 tc_dump_tfilter
, 0);
1642 err_register_pernet_subsys
:
1643 destroy_workqueue(tc_filter_wq
);
1647 subsys_initcall(tc_filter_init
);