1 // SPDX-License-Identifier: GPL-2.0-only
3 * Berkeley Packet Filter based traffic classifier
5 * Might be used to classify traffic through flexible, user-defined and
6 * possibly JIT-ed BPF filters for traffic control as an alternative to
9 * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
12 #include <linux/module.h>
13 #include <linux/types.h>
14 #include <linux/skbuff.h>
15 #include <linux/filter.h>
16 #include <linux/bpf.h>
17 #include <linux/idr.h>
19 #include <net/rtnetlink.h>
20 #include <net/pkt_cls.h>
22 #include <net/tc_wrapper.h>
24 MODULE_LICENSE("GPL");
25 MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
26 MODULE_DESCRIPTION("TC BPF based classifier");
28 #define CLS_BPF_NAME_LEN 256
29 #define CLS_BPF_SUPPORTED_GEN_FLAGS \
30 (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
33 struct list_head plist
;
34 struct idr handle_idr
;
39 struct bpf_prog
*filter
;
40 struct list_head link
;
41 struct tcf_result res
;
44 unsigned int in_hw_count
;
48 struct sock_filter
*bpf_ops
;
51 struct rcu_work rwork
;
54 static const struct nla_policy bpf_policy
[TCA_BPF_MAX
+ 1] = {
55 [TCA_BPF_CLASSID
] = { .type
= NLA_U32
},
56 [TCA_BPF_FLAGS
] = { .type
= NLA_U32
},
57 [TCA_BPF_FLAGS_GEN
] = { .type
= NLA_U32
},
58 [TCA_BPF_FD
] = { .type
= NLA_U32
},
59 [TCA_BPF_NAME
] = { .type
= NLA_NUL_STRING
,
60 .len
= CLS_BPF_NAME_LEN
},
61 [TCA_BPF_OPS_LEN
] = { .type
= NLA_U16
},
62 [TCA_BPF_OPS
] = { .type
= NLA_BINARY
,
63 .len
= sizeof(struct sock_filter
) * BPF_MAXINSNS
},
66 static int cls_bpf_exec_opcode(int code
)
81 TC_INDIRECT_SCOPE
int cls_bpf_classify(struct sk_buff
*skb
,
82 const struct tcf_proto
*tp
,
83 struct tcf_result
*res
)
85 struct cls_bpf_head
*head
= rcu_dereference_bh(tp
->root
);
86 bool at_ingress
= skb_at_tc_ingress(skb
);
87 struct cls_bpf_prog
*prog
;
90 list_for_each_entry_rcu(prog
, &head
->plist
, link
) {
93 qdisc_skb_cb(skb
)->tc_classid
= prog
->res
.classid
;
95 if (tc_skip_sw(prog
->gen_flags
)) {
96 filter_res
= prog
->exts_integrated
? TC_ACT_UNSPEC
: 0;
97 } else if (at_ingress
) {
98 /* It is safe to push/pull even if skb_shared() */
99 __skb_push(skb
, skb
->mac_len
);
100 bpf_compute_data_pointers(skb
);
101 filter_res
= bpf_prog_run(prog
->filter
, skb
);
102 __skb_pull(skb
, skb
->mac_len
);
104 bpf_compute_data_pointers(skb
);
105 filter_res
= bpf_prog_run(prog
->filter
, skb
);
107 if (unlikely(!skb
->tstamp
&& skb
->tstamp_type
))
108 skb
->tstamp_type
= SKB_CLOCK_REALTIME
;
110 if (prog
->exts_integrated
) {
112 res
->classid
= TC_H_MAJ(prog
->res
.classid
) |
113 qdisc_skb_cb(skb
)->tc_classid
;
115 ret
= cls_bpf_exec_opcode(filter_res
);
116 if (ret
== TC_ACT_UNSPEC
)
123 if (filter_res
!= -1) {
125 res
->classid
= filter_res
;
130 ret
= tcf_exts_exec(skb
, &prog
->exts
, res
);
140 static bool cls_bpf_is_ebpf(const struct cls_bpf_prog
*prog
)
142 return !prog
->bpf_ops
;
145 static int cls_bpf_offload_cmd(struct tcf_proto
*tp
, struct cls_bpf_prog
*prog
,
146 struct cls_bpf_prog
*oldprog
,
147 struct netlink_ext_ack
*extack
)
149 struct tcf_block
*block
= tp
->chain
->block
;
150 struct tc_cls_bpf_offload cls_bpf
= {};
151 struct cls_bpf_prog
*obj
;
155 skip_sw
= prog
&& tc_skip_sw(prog
->gen_flags
);
156 obj
= prog
?: oldprog
;
158 tc_cls_common_offload_init(&cls_bpf
.common
, tp
, obj
->gen_flags
, extack
);
159 cls_bpf
.command
= TC_CLSBPF_OFFLOAD
;
160 cls_bpf
.exts
= &obj
->exts
;
161 cls_bpf
.prog
= prog
? prog
->filter
: NULL
;
162 cls_bpf
.oldprog
= oldprog
? oldprog
->filter
: NULL
;
163 cls_bpf
.name
= obj
->bpf_name
;
164 cls_bpf
.exts_integrated
= obj
->exts_integrated
;
167 err
= tc_setup_cb_replace(block
, tp
, TC_SETUP_CLSBPF
, &cls_bpf
,
168 skip_sw
, &oldprog
->gen_flags
,
169 &oldprog
->in_hw_count
,
170 &prog
->gen_flags
, &prog
->in_hw_count
,
173 err
= tc_setup_cb_add(block
, tp
, TC_SETUP_CLSBPF
, &cls_bpf
,
174 skip_sw
, &prog
->gen_flags
,
175 &prog
->in_hw_count
, true);
177 err
= tc_setup_cb_destroy(block
, tp
, TC_SETUP_CLSBPF
, &cls_bpf
,
178 skip_sw
, &oldprog
->gen_flags
,
179 &oldprog
->in_hw_count
, true);
182 cls_bpf_offload_cmd(tp
, oldprog
, prog
, extack
);
186 if (prog
&& skip_sw
&& !(prog
->gen_flags
& TCA_CLS_FLAGS_IN_HW
))
192 static u32
cls_bpf_flags(u32 flags
)
194 return flags
& CLS_BPF_SUPPORTED_GEN_FLAGS
;
197 static int cls_bpf_offload(struct tcf_proto
*tp
, struct cls_bpf_prog
*prog
,
198 struct cls_bpf_prog
*oldprog
,
199 struct netlink_ext_ack
*extack
)
201 if (prog
&& oldprog
&&
202 cls_bpf_flags(prog
->gen_flags
) !=
203 cls_bpf_flags(oldprog
->gen_flags
))
206 if (prog
&& tc_skip_hw(prog
->gen_flags
))
208 if (oldprog
&& tc_skip_hw(oldprog
->gen_flags
))
210 if (!prog
&& !oldprog
)
213 return cls_bpf_offload_cmd(tp
, prog
, oldprog
, extack
);
216 static void cls_bpf_stop_offload(struct tcf_proto
*tp
,
217 struct cls_bpf_prog
*prog
,
218 struct netlink_ext_ack
*extack
)
222 err
= cls_bpf_offload_cmd(tp
, NULL
, prog
, extack
);
224 pr_err("Stopping hardware offload failed: %d\n", err
);
227 static void cls_bpf_offload_update_stats(struct tcf_proto
*tp
,
228 struct cls_bpf_prog
*prog
)
230 struct tcf_block
*block
= tp
->chain
->block
;
231 struct tc_cls_bpf_offload cls_bpf
= {};
233 tc_cls_common_offload_init(&cls_bpf
.common
, tp
, prog
->gen_flags
, NULL
);
234 cls_bpf
.command
= TC_CLSBPF_STATS
;
235 cls_bpf
.exts
= &prog
->exts
;
236 cls_bpf
.prog
= prog
->filter
;
237 cls_bpf
.name
= prog
->bpf_name
;
238 cls_bpf
.exts_integrated
= prog
->exts_integrated
;
240 tc_setup_cb_call(block
, TC_SETUP_CLSBPF
, &cls_bpf
, false, true);
243 static int cls_bpf_init(struct tcf_proto
*tp
)
245 struct cls_bpf_head
*head
;
247 head
= kzalloc(sizeof(*head
), GFP_KERNEL
);
251 INIT_LIST_HEAD_RCU(&head
->plist
);
252 idr_init(&head
->handle_idr
);
253 rcu_assign_pointer(tp
->root
, head
);
258 static void cls_bpf_free_parms(struct cls_bpf_prog
*prog
)
260 if (cls_bpf_is_ebpf(prog
))
261 bpf_prog_put(prog
->filter
);
263 bpf_prog_destroy(prog
->filter
);
265 kfree(prog
->bpf_name
);
266 kfree(prog
->bpf_ops
);
269 static void __cls_bpf_delete_prog(struct cls_bpf_prog
*prog
)
271 tcf_exts_destroy(&prog
->exts
);
272 tcf_exts_put_net(&prog
->exts
);
274 cls_bpf_free_parms(prog
);
278 static void cls_bpf_delete_prog_work(struct work_struct
*work
)
280 struct cls_bpf_prog
*prog
= container_of(to_rcu_work(work
),
284 __cls_bpf_delete_prog(prog
);
288 static void __cls_bpf_delete(struct tcf_proto
*tp
, struct cls_bpf_prog
*prog
,
289 struct netlink_ext_ack
*extack
)
291 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
293 idr_remove(&head
->handle_idr
, prog
->handle
);
294 cls_bpf_stop_offload(tp
, prog
, extack
);
295 list_del_rcu(&prog
->link
);
296 tcf_unbind_filter(tp
, &prog
->res
);
297 if (tcf_exts_get_net(&prog
->exts
))
298 tcf_queue_work(&prog
->rwork
, cls_bpf_delete_prog_work
);
300 __cls_bpf_delete_prog(prog
);
303 static int cls_bpf_delete(struct tcf_proto
*tp
, void *arg
, bool *last
,
304 bool rtnl_held
, struct netlink_ext_ack
*extack
)
306 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
308 __cls_bpf_delete(tp
, arg
, extack
);
309 *last
= list_empty(&head
->plist
);
313 static void cls_bpf_destroy(struct tcf_proto
*tp
, bool rtnl_held
,
314 struct netlink_ext_ack
*extack
)
316 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
317 struct cls_bpf_prog
*prog
, *tmp
;
319 list_for_each_entry_safe(prog
, tmp
, &head
->plist
, link
)
320 __cls_bpf_delete(tp
, prog
, extack
);
322 idr_destroy(&head
->handle_idr
);
323 kfree_rcu(head
, rcu
);
326 static void *cls_bpf_get(struct tcf_proto
*tp
, u32 handle
)
328 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
329 struct cls_bpf_prog
*prog
;
331 list_for_each_entry(prog
, &head
->plist
, link
) {
332 if (prog
->handle
== handle
)
339 static int cls_bpf_prog_from_ops(struct nlattr
**tb
, struct cls_bpf_prog
*prog
)
341 struct sock_filter
*bpf_ops
;
342 struct sock_fprog_kern fprog_tmp
;
344 u16 bpf_size
, bpf_num_ops
;
347 bpf_num_ops
= nla_get_u16(tb
[TCA_BPF_OPS_LEN
]);
348 if (bpf_num_ops
> BPF_MAXINSNS
|| bpf_num_ops
== 0)
351 bpf_size
= bpf_num_ops
* sizeof(*bpf_ops
);
352 if (bpf_size
!= nla_len(tb
[TCA_BPF_OPS
]))
355 bpf_ops
= kmemdup(nla_data(tb
[TCA_BPF_OPS
]), bpf_size
, GFP_KERNEL
);
359 fprog_tmp
.len
= bpf_num_ops
;
360 fprog_tmp
.filter
= bpf_ops
;
362 ret
= bpf_prog_create(&fp
, &fprog_tmp
);
368 prog
->bpf_ops
= bpf_ops
;
369 prog
->bpf_num_ops
= bpf_num_ops
;
370 prog
->bpf_name
= NULL
;
376 static int cls_bpf_prog_from_efd(struct nlattr
**tb
, struct cls_bpf_prog
*prog
,
377 u32 gen_flags
, const struct tcf_proto
*tp
)
384 bpf_fd
= nla_get_u32(tb
[TCA_BPF_FD
]);
385 skip_sw
= gen_flags
& TCA_CLS_FLAGS_SKIP_SW
;
387 fp
= bpf_prog_get_type_dev(bpf_fd
, BPF_PROG_TYPE_SCHED_CLS
, skip_sw
);
391 if (tb
[TCA_BPF_NAME
]) {
392 name
= nla_memdup(tb
[TCA_BPF_NAME
], GFP_KERNEL
);
399 prog
->bpf_ops
= NULL
;
400 prog
->bpf_name
= name
;
404 tcf_block_netif_keep_dst(tp
->chain
->block
);
409 static int cls_bpf_change(struct net
*net
, struct sk_buff
*in_skb
,
410 struct tcf_proto
*tp
, unsigned long base
,
411 u32 handle
, struct nlattr
**tca
,
412 void **arg
, u32 flags
,
413 struct netlink_ext_ack
*extack
)
415 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
416 bool is_bpf
, is_ebpf
, have_exts
= false;
417 struct cls_bpf_prog
*oldprog
= *arg
;
418 struct nlattr
*tb
[TCA_BPF_MAX
+ 1];
419 bool bound_to_filter
= false;
420 struct cls_bpf_prog
*prog
;
424 if (tca
[TCA_OPTIONS
] == NULL
)
427 ret
= nla_parse_nested_deprecated(tb
, TCA_BPF_MAX
, tca
[TCA_OPTIONS
],
432 prog
= kzalloc(sizeof(*prog
), GFP_KERNEL
);
436 ret
= tcf_exts_init(&prog
->exts
, net
, TCA_BPF_ACT
, TCA_BPF_POLICE
);
441 if (handle
&& oldprog
->handle
!= handle
) {
449 ret
= idr_alloc_u32(&head
->handle_idr
, prog
, &handle
,
450 INT_MAX
, GFP_KERNEL
);
451 } else if (!oldprog
) {
452 ret
= idr_alloc_u32(&head
->handle_idr
, prog
, &handle
,
458 prog
->handle
= handle
;
460 is_bpf
= tb
[TCA_BPF_OPS_LEN
] && tb
[TCA_BPF_OPS
];
461 is_ebpf
= tb
[TCA_BPF_FD
];
462 if ((!is_bpf
&& !is_ebpf
) || (is_bpf
&& is_ebpf
)) {
467 ret
= tcf_exts_validate(net
, tp
, tb
, tca
[TCA_RATE
], &prog
->exts
,
472 if (tb
[TCA_BPF_FLAGS
]) {
473 u32 bpf_flags
= nla_get_u32(tb
[TCA_BPF_FLAGS
]);
475 if (bpf_flags
& ~TCA_BPF_FLAG_ACT_DIRECT
) {
480 have_exts
= bpf_flags
& TCA_BPF_FLAG_ACT_DIRECT
;
482 if (tb
[TCA_BPF_FLAGS_GEN
]) {
483 gen_flags
= nla_get_u32(tb
[TCA_BPF_FLAGS_GEN
]);
484 if (gen_flags
& ~CLS_BPF_SUPPORTED_GEN_FLAGS
||
485 !tc_flags_valid(gen_flags
)) {
491 prog
->exts_integrated
= have_exts
;
492 prog
->gen_flags
= gen_flags
;
494 ret
= is_bpf
? cls_bpf_prog_from_ops(tb
, prog
) :
495 cls_bpf_prog_from_efd(tb
, prog
, gen_flags
, tp
);
499 if (tb
[TCA_BPF_CLASSID
]) {
500 prog
->res
.classid
= nla_get_u32(tb
[TCA_BPF_CLASSID
]);
501 tcf_bind_filter(tp
, &prog
->res
, base
);
502 bound_to_filter
= true;
505 ret
= cls_bpf_offload(tp
, prog
, oldprog
, extack
);
509 if (!tc_in_hw(prog
->gen_flags
))
510 prog
->gen_flags
|= TCA_CLS_FLAGS_NOT_IN_HW
;
513 idr_replace(&head
->handle_idr
, prog
, handle
);
514 list_replace_rcu(&oldprog
->link
, &prog
->link
);
515 tcf_unbind_filter(tp
, &oldprog
->res
);
516 tcf_exts_get_net(&oldprog
->exts
);
517 tcf_queue_work(&oldprog
->rwork
, cls_bpf_delete_prog_work
);
519 list_add_rcu(&prog
->link
, &head
->plist
);
527 tcf_unbind_filter(tp
, &prog
->res
);
528 cls_bpf_free_parms(prog
);
531 idr_remove(&head
->handle_idr
, prog
->handle
);
533 tcf_exts_destroy(&prog
->exts
);
538 static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog
*prog
,
543 if (nla_put_u16(skb
, TCA_BPF_OPS_LEN
, prog
->bpf_num_ops
))
546 nla
= nla_reserve(skb
, TCA_BPF_OPS
, prog
->bpf_num_ops
*
547 sizeof(struct sock_filter
));
551 memcpy(nla_data(nla
), prog
->bpf_ops
, nla_len(nla
));
556 static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog
*prog
,
561 if (prog
->bpf_name
&&
562 nla_put_string(skb
, TCA_BPF_NAME
, prog
->bpf_name
))
565 if (nla_put_u32(skb
, TCA_BPF_ID
, prog
->filter
->aux
->id
))
568 nla
= nla_reserve(skb
, TCA_BPF_TAG
, sizeof(prog
->filter
->tag
));
572 memcpy(nla_data(nla
), prog
->filter
->tag
, nla_len(nla
));
577 static int cls_bpf_dump(struct net
*net
, struct tcf_proto
*tp
, void *fh
,
578 struct sk_buff
*skb
, struct tcmsg
*tm
, bool rtnl_held
)
580 struct cls_bpf_prog
*prog
= fh
;
588 tm
->tcm_handle
= prog
->handle
;
590 cls_bpf_offload_update_stats(tp
, prog
);
592 nest
= nla_nest_start_noflag(skb
, TCA_OPTIONS
);
594 goto nla_put_failure
;
596 if (prog
->res
.classid
&&
597 nla_put_u32(skb
, TCA_BPF_CLASSID
, prog
->res
.classid
))
598 goto nla_put_failure
;
600 if (cls_bpf_is_ebpf(prog
))
601 ret
= cls_bpf_dump_ebpf_info(prog
, skb
);
603 ret
= cls_bpf_dump_bpf_info(prog
, skb
);
605 goto nla_put_failure
;
607 if (tcf_exts_dump(skb
, &prog
->exts
) < 0)
608 goto nla_put_failure
;
610 if (prog
->exts_integrated
)
611 bpf_flags
|= TCA_BPF_FLAG_ACT_DIRECT
;
612 if (bpf_flags
&& nla_put_u32(skb
, TCA_BPF_FLAGS
, bpf_flags
))
613 goto nla_put_failure
;
614 if (prog
->gen_flags
&&
615 nla_put_u32(skb
, TCA_BPF_FLAGS_GEN
, prog
->gen_flags
))
616 goto nla_put_failure
;
618 nla_nest_end(skb
, nest
);
620 if (tcf_exts_dump_stats(skb
, &prog
->exts
) < 0)
621 goto nla_put_failure
;
626 nla_nest_cancel(skb
, nest
);
630 static void cls_bpf_bind_class(void *fh
, u32 classid
, unsigned long cl
,
631 void *q
, unsigned long base
)
633 struct cls_bpf_prog
*prog
= fh
;
635 tc_cls_bind_class(classid
, cl
, q
, &prog
->res
, base
);
638 static void cls_bpf_walk(struct tcf_proto
*tp
, struct tcf_walker
*arg
,
641 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
642 struct cls_bpf_prog
*prog
;
644 list_for_each_entry(prog
, &head
->plist
, link
) {
645 if (!tc_cls_stats_dump(tp
, arg
, prog
))
650 static int cls_bpf_reoffload(struct tcf_proto
*tp
, bool add
, flow_setup_cb_t
*cb
,
651 void *cb_priv
, struct netlink_ext_ack
*extack
)
653 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
654 struct tcf_block
*block
= tp
->chain
->block
;
655 struct tc_cls_bpf_offload cls_bpf
= {};
656 struct cls_bpf_prog
*prog
;
659 list_for_each_entry(prog
, &head
->plist
, link
) {
660 if (tc_skip_hw(prog
->gen_flags
))
663 tc_cls_common_offload_init(&cls_bpf
.common
, tp
, prog
->gen_flags
,
665 cls_bpf
.command
= TC_CLSBPF_OFFLOAD
;
666 cls_bpf
.exts
= &prog
->exts
;
667 cls_bpf
.prog
= add
? prog
->filter
: NULL
;
668 cls_bpf
.oldprog
= add
? NULL
: prog
->filter
;
669 cls_bpf
.name
= prog
->bpf_name
;
670 cls_bpf
.exts_integrated
= prog
->exts_integrated
;
672 err
= tc_setup_cb_reoffload(block
, tp
, add
, cb
, TC_SETUP_CLSBPF
,
673 &cls_bpf
, cb_priv
, &prog
->gen_flags
,
682 static struct tcf_proto_ops cls_bpf_ops __read_mostly
= {
684 .owner
= THIS_MODULE
,
685 .classify
= cls_bpf_classify
,
686 .init
= cls_bpf_init
,
687 .destroy
= cls_bpf_destroy
,
689 .change
= cls_bpf_change
,
690 .delete = cls_bpf_delete
,
691 .walk
= cls_bpf_walk
,
692 .reoffload
= cls_bpf_reoffload
,
693 .dump
= cls_bpf_dump
,
694 .bind_class
= cls_bpf_bind_class
,
696 MODULE_ALIAS_NET_CLS("bpf");
698 static int __init
cls_bpf_init_mod(void)
700 return register_tcf_proto_ops(&cls_bpf_ops
);
703 static void __exit
cls_bpf_exit_mod(void)
705 unregister_tcf_proto_ops(&cls_bpf_ops
);
708 module_init(cls_bpf_init_mod
);
709 module_exit(cls_bpf_exit_mod
);