2 * Berkeley Packet Filter based traffic classifier
4 * Might be used to classify traffic through flexible, user-defined and
5 * possibly JIT-ed BPF filters for traffic control as an alternative to
8 * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
15 #include <linux/module.h>
16 #include <linux/types.h>
17 #include <linux/skbuff.h>
18 #include <linux/filter.h>
19 #include <linux/bpf.h>
21 #include <net/rtnetlink.h>
22 #include <net/pkt_cls.h>
25 MODULE_LICENSE("GPL");
26 MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
27 MODULE_DESCRIPTION("TC BPF based classifier");
29 #define CLS_BPF_NAME_LEN 256
32 struct list_head plist
;
38 struct bpf_prog
*filter
;
39 struct list_head link
;
40 struct tcf_result res
;
47 struct sock_filter
*bpf_ops
;
53 static const struct nla_policy bpf_policy
[TCA_BPF_MAX
+ 1] = {
54 [TCA_BPF_CLASSID
] = { .type
= NLA_U32
},
55 [TCA_BPF_FD
] = { .type
= NLA_U32
},
56 [TCA_BPF_NAME
] = { .type
= NLA_NUL_STRING
, .len
= CLS_BPF_NAME_LEN
},
57 [TCA_BPF_OPS_LEN
] = { .type
= NLA_U16
},
58 [TCA_BPF_OPS
] = { .type
= NLA_BINARY
,
59 .len
= sizeof(struct sock_filter
) * BPF_MAXINSNS
},
62 static int cls_bpf_classify(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
63 struct tcf_result
*res
)
65 struct cls_bpf_head
*head
= rcu_dereference_bh(tp
->root
);
66 struct cls_bpf_prog
*prog
;
67 #ifdef CONFIG_NET_CLS_ACT
68 bool at_ingress
= G_TC_AT(skb
->tc_verd
) & AT_INGRESS
;
70 bool at_ingress
= false;
74 if (unlikely(!skb_mac_header_was_set(skb
)))
77 /* Needed here for accessing maps. */
79 list_for_each_entry_rcu(prog
, &head
->plist
, link
) {
83 /* It is safe to push/pull even if skb_shared() */
84 __skb_push(skb
, skb
->mac_len
);
85 filter_res
= BPF_PROG_RUN(prog
->filter
, skb
);
86 __skb_pull(skb
, skb
->mac_len
);
88 filter_res
= BPF_PROG_RUN(prog
->filter
, skb
);
96 res
->classid
= filter_res
;
98 ret
= tcf_exts_exec(skb
, &prog
->exts
, res
);
109 static bool cls_bpf_is_ebpf(const struct cls_bpf_prog
*prog
)
111 return !prog
->bpf_ops
;
114 static int cls_bpf_init(struct tcf_proto
*tp
)
116 struct cls_bpf_head
*head
;
118 head
= kzalloc(sizeof(*head
), GFP_KERNEL
);
122 INIT_LIST_HEAD_RCU(&head
->plist
);
123 rcu_assign_pointer(tp
->root
, head
);
128 static void cls_bpf_delete_prog(struct tcf_proto
*tp
, struct cls_bpf_prog
*prog
)
130 tcf_exts_destroy(&prog
->exts
);
132 if (cls_bpf_is_ebpf(prog
))
133 bpf_prog_put(prog
->filter
);
135 bpf_prog_destroy(prog
->filter
);
137 kfree(prog
->bpf_name
);
138 kfree(prog
->bpf_ops
);
142 static void __cls_bpf_delete_prog(struct rcu_head
*rcu
)
144 struct cls_bpf_prog
*prog
= container_of(rcu
, struct cls_bpf_prog
, rcu
);
146 cls_bpf_delete_prog(prog
->tp
, prog
);
149 static int cls_bpf_delete(struct tcf_proto
*tp
, unsigned long arg
)
151 struct cls_bpf_prog
*prog
= (struct cls_bpf_prog
*) arg
;
153 list_del_rcu(&prog
->link
);
154 tcf_unbind_filter(tp
, &prog
->res
);
155 call_rcu(&prog
->rcu
, __cls_bpf_delete_prog
);
160 static bool cls_bpf_destroy(struct tcf_proto
*tp
, bool force
)
162 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
163 struct cls_bpf_prog
*prog
, *tmp
;
165 if (!force
&& !list_empty(&head
->plist
))
168 list_for_each_entry_safe(prog
, tmp
, &head
->plist
, link
) {
169 list_del_rcu(&prog
->link
);
170 tcf_unbind_filter(tp
, &prog
->res
);
171 call_rcu(&prog
->rcu
, __cls_bpf_delete_prog
);
174 RCU_INIT_POINTER(tp
->root
, NULL
);
175 kfree_rcu(head
, rcu
);
179 static unsigned long cls_bpf_get(struct tcf_proto
*tp
, u32 handle
)
181 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
182 struct cls_bpf_prog
*prog
;
183 unsigned long ret
= 0UL;
188 list_for_each_entry(prog
, &head
->plist
, link
) {
189 if (prog
->handle
== handle
) {
190 ret
= (unsigned long) prog
;
198 static int cls_bpf_prog_from_ops(struct nlattr
**tb
,
199 struct cls_bpf_prog
*prog
, u32 classid
)
201 struct sock_filter
*bpf_ops
;
202 struct sock_fprog_kern fprog_tmp
;
204 u16 bpf_size
, bpf_num_ops
;
207 bpf_num_ops
= nla_get_u16(tb
[TCA_BPF_OPS_LEN
]);
208 if (bpf_num_ops
> BPF_MAXINSNS
|| bpf_num_ops
== 0)
211 bpf_size
= bpf_num_ops
* sizeof(*bpf_ops
);
212 if (bpf_size
!= nla_len(tb
[TCA_BPF_OPS
]))
215 bpf_ops
= kzalloc(bpf_size
, GFP_KERNEL
);
219 memcpy(bpf_ops
, nla_data(tb
[TCA_BPF_OPS
]), bpf_size
);
221 fprog_tmp
.len
= bpf_num_ops
;
222 fprog_tmp
.filter
= bpf_ops
;
224 ret
= bpf_prog_create(&fp
, &fprog_tmp
);
230 prog
->bpf_ops
= bpf_ops
;
231 prog
->bpf_num_ops
= bpf_num_ops
;
232 prog
->bpf_name
= NULL
;
235 prog
->res
.classid
= classid
;
240 static int cls_bpf_prog_from_efd(struct nlattr
**tb
,
241 struct cls_bpf_prog
*prog
, u32 classid
)
247 bpf_fd
= nla_get_u32(tb
[TCA_BPF_FD
]);
249 fp
= bpf_prog_get(bpf_fd
);
253 if (fp
->type
!= BPF_PROG_TYPE_SCHED_CLS
) {
258 if (tb
[TCA_BPF_NAME
]) {
259 name
= kmemdup(nla_data(tb
[TCA_BPF_NAME
]),
260 nla_len(tb
[TCA_BPF_NAME
]),
268 prog
->bpf_ops
= NULL
;
269 prog
->bpf_fd
= bpf_fd
;
270 prog
->bpf_name
= name
;
273 prog
->res
.classid
= classid
;
278 static int cls_bpf_modify_existing(struct net
*net
, struct tcf_proto
*tp
,
279 struct cls_bpf_prog
*prog
,
280 unsigned long base
, struct nlattr
**tb
,
281 struct nlattr
*est
, bool ovr
)
283 struct tcf_exts exts
;
284 bool is_bpf
, is_ebpf
;
288 is_bpf
= tb
[TCA_BPF_OPS_LEN
] && tb
[TCA_BPF_OPS
];
289 is_ebpf
= tb
[TCA_BPF_FD
];
291 if ((!is_bpf
&& !is_ebpf
) || (is_bpf
&& is_ebpf
) ||
292 !tb
[TCA_BPF_CLASSID
])
295 tcf_exts_init(&exts
, TCA_BPF_ACT
, TCA_BPF_POLICE
);
296 ret
= tcf_exts_validate(net
, tp
, tb
, est
, &exts
, ovr
);
300 classid
= nla_get_u32(tb
[TCA_BPF_CLASSID
]);
302 ret
= is_bpf
? cls_bpf_prog_from_ops(tb
, prog
, classid
) :
303 cls_bpf_prog_from_efd(tb
, prog
, classid
);
305 tcf_exts_destroy(&exts
);
309 tcf_bind_filter(tp
, &prog
->res
, base
);
310 tcf_exts_change(tp
, &prog
->exts
, &exts
);
315 static u32
cls_bpf_grab_new_handle(struct tcf_proto
*tp
,
316 struct cls_bpf_head
*head
)
318 unsigned int i
= 0x80000000;
322 if (++head
->hgen
== 0x7FFFFFFF)
324 } while (--i
> 0 && cls_bpf_get(tp
, head
->hgen
));
326 if (unlikely(i
== 0)) {
327 pr_err("Insufficient number of handles\n");
336 static int cls_bpf_change(struct net
*net
, struct sk_buff
*in_skb
,
337 struct tcf_proto
*tp
, unsigned long base
,
338 u32 handle
, struct nlattr
**tca
,
339 unsigned long *arg
, bool ovr
)
341 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
342 struct cls_bpf_prog
*oldprog
= (struct cls_bpf_prog
*) *arg
;
343 struct nlattr
*tb
[TCA_BPF_MAX
+ 1];
344 struct cls_bpf_prog
*prog
;
347 if (tca
[TCA_OPTIONS
] == NULL
)
350 ret
= nla_parse_nested(tb
, TCA_BPF_MAX
, tca
[TCA_OPTIONS
], bpf_policy
);
354 prog
= kzalloc(sizeof(*prog
), GFP_KERNEL
);
358 tcf_exts_init(&prog
->exts
, TCA_BPF_ACT
, TCA_BPF_POLICE
);
361 if (handle
&& oldprog
->handle
!= handle
) {
368 prog
->handle
= cls_bpf_grab_new_handle(tp
, head
);
370 prog
->handle
= handle
;
371 if (prog
->handle
== 0) {
376 ret
= cls_bpf_modify_existing(net
, tp
, prog
, base
, tb
, tca
[TCA_RATE
], ovr
);
381 list_replace_rcu(&prog
->link
, &oldprog
->link
);
382 tcf_unbind_filter(tp
, &oldprog
->res
);
383 call_rcu(&oldprog
->rcu
, __cls_bpf_delete_prog
);
385 list_add_rcu(&prog
->link
, &head
->plist
);
388 *arg
= (unsigned long) prog
;
396 static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog
*prog
,
401 if (nla_put_u16(skb
, TCA_BPF_OPS_LEN
, prog
->bpf_num_ops
))
404 nla
= nla_reserve(skb
, TCA_BPF_OPS
, prog
->bpf_num_ops
*
405 sizeof(struct sock_filter
));
409 memcpy(nla_data(nla
), prog
->bpf_ops
, nla_len(nla
));
414 static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog
*prog
,
417 if (nla_put_u32(skb
, TCA_BPF_FD
, prog
->bpf_fd
))
420 if (prog
->bpf_name
&&
421 nla_put_string(skb
, TCA_BPF_NAME
, prog
->bpf_name
))
427 static int cls_bpf_dump(struct net
*net
, struct tcf_proto
*tp
, unsigned long fh
,
428 struct sk_buff
*skb
, struct tcmsg
*tm
)
430 struct cls_bpf_prog
*prog
= (struct cls_bpf_prog
*) fh
;
437 tm
->tcm_handle
= prog
->handle
;
439 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
441 goto nla_put_failure
;
443 if (nla_put_u32(skb
, TCA_BPF_CLASSID
, prog
->res
.classid
))
444 goto nla_put_failure
;
446 if (cls_bpf_is_ebpf(prog
))
447 ret
= cls_bpf_dump_ebpf_info(prog
, skb
);
449 ret
= cls_bpf_dump_bpf_info(prog
, skb
);
451 goto nla_put_failure
;
453 if (tcf_exts_dump(skb
, &prog
->exts
) < 0)
454 goto nla_put_failure
;
456 nla_nest_end(skb
, nest
);
458 if (tcf_exts_dump_stats(skb
, &prog
->exts
) < 0)
459 goto nla_put_failure
;
464 nla_nest_cancel(skb
, nest
);
468 static void cls_bpf_walk(struct tcf_proto
*tp
, struct tcf_walker
*arg
)
470 struct cls_bpf_head
*head
= rtnl_dereference(tp
->root
);
471 struct cls_bpf_prog
*prog
;
473 list_for_each_entry(prog
, &head
->plist
, link
) {
474 if (arg
->count
< arg
->skip
)
476 if (arg
->fn(tp
, (unsigned long) prog
, arg
) < 0) {
485 static struct tcf_proto_ops cls_bpf_ops __read_mostly
= {
487 .owner
= THIS_MODULE
,
488 .classify
= cls_bpf_classify
,
489 .init
= cls_bpf_init
,
490 .destroy
= cls_bpf_destroy
,
492 .change
= cls_bpf_change
,
493 .delete = cls_bpf_delete
,
494 .walk
= cls_bpf_walk
,
495 .dump
= cls_bpf_dump
,
498 static int __init
cls_bpf_init_mod(void)
500 return register_tcf_proto_ops(&cls_bpf_ops
);
503 static void __exit
cls_bpf_exit_mod(void)
505 unregister_tcf_proto_ops(&cls_bpf_ops
);
508 module_init(cls_bpf_init_mod
);
509 module_exit(cls_bpf_exit_mod
);