pf_lana: release lock in sendmsg after ppe has finished
[ana-net.git] / src / fb_bpf.c
blob96b151fc52f1bb3d4575bca569d8f5dc9f92aef7
1 /*
2 * Lightweight Autonomic Network Architecture
4 * LANA Berkeley Packet Filter (BPF) module.
6 * Copyright 2011 Daniel Borkmann <dborkma@tik.ee.ethz.ch>,
7 * Swiss federal institute of technology (ETH Zurich)
8 * Subject to the GPL.
10 * To generate BPF's, do the following:
11 * 1. Install flex + bison
13 * 2. Download bpfc from http://netsniff-ng.org i.e.:
14 * cd /tmp
15 * git clone git://repo.or.cz/netsniff-ng.git
16 * cd netsniff-ng/src/bpfc/
17 * make && make install
19 * vim firstfilter
21 * ldh #proto
22 * jeq #0x800,L1,L2
23 * L1: ret #0xffffff
24 * L2: ret #0
26 * And finally cat the code into the fb's procfs file, e.g.
27 * bpfc firstfilter > /proc/net/lana/fblock/fb1
30 #include <linux/kernel.h>
31 #include <linux/module.h>
32 #include <linux/spinlock.h>
33 #include <linux/notifier.h>
34 #include <linux/rcupdate.h>
35 #include <linux/seqlock.h>
36 #include <linux/spinlock.h>
37 #include <linux/slab.h>
38 #include <linux/percpu.h>
39 #include <linux/prefetch.h>
40 #include <linux/filter.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/uaccess.h>
45 #include "xt_fblock.h"
46 #include "xt_builder.h"
47 #include "xt_idp.h"
48 #include "xt_skb.h"
49 #include "xt_engine.h"
50 #include "xt_builder.h"
52 struct fb_bpf_priv {
53 idp_t port[2];
54 struct sk_filter *filter;
55 spinlock_t flock;
58 struct sock_fprog_kern {
59 unsigned short len;
60 struct sock_filter *filter;
64 * Note:
65 * To use the BPF JIT compiler, you need to export symbols from
66 * /arch/x86/net/ so that they can be used from a module. Then,
67 * recompile your kernel with CONFIG_BPF_JIT=y and change symbols
68 * within this file from fb_bpf_jit_<x> to bpf_jit_<x> and the macro
69 * FB_SK_RUN_FILTER to SK_RUN_FILTER.
72 static inline void fb_bpf_jit_compile(struct sk_filter *fp)
76 static inline void fb_bpf_jit_free(struct sk_filter *fp)
80 static int fb_bpf_init_filter(struct fb_bpf_priv __percpu *fb_priv_cpu,
81 struct sock_fprog_kern *fprog, unsigned int cpu)
83 int err;
84 struct sk_filter *sf, *sfold;
85 unsigned int fsize;
86 unsigned long flags;
88 if (fprog->filter == NULL)
89 return -EINVAL;
91 fsize = sizeof(struct sock_filter) * fprog->len;
93 sf = kmalloc_node(fsize + sizeof(*sf), GFP_KERNEL, cpu_to_node(cpu));
94 if (!sf)
95 return -ENOMEM;
97 memcpy(sf->insns, fprog->filter, fsize);
98 atomic_set(&sf->refcnt, 1);
99 sf->len = fprog->len;
100 sf->bpf_func = sk_run_filter;
102 err = sk_chk_filter(sf->insns, sf->len);
103 if (err) {
104 kfree(sf);
105 return err;
108 fb_bpf_jit_compile(sf);
110 spin_lock_irqsave(&fb_priv_cpu->flock, flags);
111 sfold = fb_priv_cpu->filter;
112 fb_priv_cpu->filter = sf;
113 spin_unlock_irqrestore(&fb_priv_cpu->flock, flags);
115 if (sfold) {
116 fb_bpf_jit_free(sfold);
117 kfree(sfold);
120 return 0;
123 static int fb_bpf_init_filter_cpus(struct fblock *fb,
124 struct sock_fprog_kern *fprog)
126 int err = 0;
127 unsigned int cpu;
128 struct fb_bpf_priv __percpu *fb_priv;
130 if (!fprog || !fb)
131 return -EINVAL;
133 rcu_read_lock();
134 fb_priv = (struct fb_bpf_priv __percpu *) rcu_dereference_raw(fb->private_data);
135 rcu_read_unlock();
137 get_online_cpus();
138 for_each_online_cpu(cpu) {
139 struct fb_bpf_priv *fb_priv_cpu;
140 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
141 err = fb_bpf_init_filter(fb_priv_cpu, fprog, cpu);
142 if (err != 0) {
143 printk(KERN_ERR "[%s::%s] fb_bpf_init_filter error: %d\n",
144 fb->name, fb->factory->type, err);
145 break;
148 put_online_cpus();
150 return err;
153 static void fb_bpf_cleanup_filter(struct fb_bpf_priv __percpu *fb_priv_cpu)
155 unsigned long flags;
156 struct sk_filter *sfold;
158 spin_lock_irqsave(&fb_priv_cpu->flock, flags);
159 sfold = fb_priv_cpu->filter;
160 fb_priv_cpu->filter = NULL;
161 spin_unlock_irqrestore(&fb_priv_cpu->flock, flags);
163 if (sfold) {
164 fb_bpf_jit_free(sfold);
165 kfree(sfold);
169 static void fb_bpf_cleanup_filter_cpus(struct fblock *fb)
171 unsigned int cpu;
172 struct fb_bpf_priv __percpu *fb_priv;
174 if (!fb)
175 return;
177 rcu_read_lock();
178 fb_priv = (struct fb_bpf_priv __percpu *) rcu_dereference_raw(fb->private_data);
179 rcu_read_unlock();
181 get_online_cpus();
182 for_each_online_cpu(cpu) {
183 struct fb_bpf_priv *fb_priv_cpu;
184 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
185 fb_bpf_cleanup_filter(fb_priv_cpu);
187 put_online_cpus();
190 static int fb_bpf_netrx(const struct fblock * const fb,
191 struct sk_buff * const skb,
192 enum path_type * const dir)
194 int drop = 0;
195 unsigned int pkt_len;
196 unsigned long flags;
197 struct fb_bpf_priv __percpu *fb_priv_cpu;
199 fb_priv_cpu = this_cpu_ptr(rcu_dereference_raw(fb->private_data));
201 spin_lock_irqsave(&fb_priv_cpu->flock, flags);
202 if (fb_priv_cpu->filter) {
203 pkt_len = SK_RUN_FILTER(fb_priv_cpu->filter, skb);
204 if (pkt_len < skb->len) {
205 spin_unlock_irqrestore(&fb_priv_cpu->flock, flags);
206 kfree_skb(skb);
207 return PPE_DROPPED;
210 write_next_idp_to_skb(skb, fb->idp, fb_priv_cpu->port[*dir]);
211 if (fb_priv_cpu->port[*dir] == IDP_UNKNOWN)
212 drop = 1;
213 spin_unlock_irqrestore(&fb_priv_cpu->flock, flags);
214 if (drop) {
215 kfree_skb(skb);
216 return PPE_DROPPED;
218 return PPE_SUCCESS;
221 static int fb_bpf_event(struct notifier_block *self, unsigned long cmd,
222 void *args)
224 int ret = NOTIFY_OK;
225 unsigned int cpu;
226 struct fblock *fb;
227 struct fb_bpf_priv __percpu *fb_priv;
229 rcu_read_lock();
230 fb = rcu_dereference_raw(container_of(self, struct fblock_notifier, nb)->self);
231 fb_priv = (struct fb_bpf_priv __percpu *) rcu_dereference_raw(fb->private_data);
232 rcu_read_unlock();
234 switch (cmd) {
235 case FBLOCK_BIND_IDP: {
236 int bound = 0;
237 struct fblock_bind_msg *msg = args;
238 get_online_cpus();
239 for_each_online_cpu(cpu) {
240 struct fb_bpf_priv *fb_priv_cpu;
241 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
242 spin_lock(&fb_priv_cpu->flock);
243 if (fb_priv_cpu->port[msg->dir] == IDP_UNKNOWN) {
244 fb_priv_cpu->port[msg->dir] = msg->idp;
245 bound = 1;
246 } else {
247 ret = NOTIFY_BAD;
248 spin_unlock(&fb_priv_cpu->flock);
249 break;
251 spin_unlock(&fb_priv_cpu->flock);
253 put_online_cpus();
254 if (bound)
255 printk(KERN_INFO "[%s::%s] port %s bound to IDP%u\n",
256 fb->name, fb->factory->type,
257 path_names[msg->dir], msg->idp);
258 } break;
259 case FBLOCK_UNBIND_IDP: {
260 int unbound = 0;
261 struct fblock_bind_msg *msg = args;
262 get_online_cpus();
263 for_each_online_cpu(cpu) {
264 struct fb_bpf_priv *fb_priv_cpu;
265 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
266 spin_lock(&fb_priv_cpu->flock);
267 if (fb_priv_cpu->port[msg->dir] == msg->idp) {
268 fb_priv_cpu->port[msg->dir] = IDP_UNKNOWN;
269 unbound = 1;
270 } else {
271 ret = NOTIFY_BAD;
272 spin_unlock(&fb_priv_cpu->flock);
273 break;
275 spin_unlock(&fb_priv_cpu->flock);
277 put_online_cpus();
278 if (unbound)
279 printk(KERN_INFO "[%s::%s] port %s unbound\n",
280 fb->name, fb->factory->type,
281 path_names[msg->dir]);
282 } break;
283 default:
284 break;
287 return ret;
290 static int fb_bpf_proc_show_filter(struct seq_file *m, void *v)
292 unsigned long flags;
293 struct fblock *fb = (struct fblock *) m->private;
294 struct fb_bpf_priv *fb_priv_cpu;
295 struct sk_filter *sf;
297 get_online_cpus();
298 rcu_read_lock();
299 fb_priv_cpu = this_cpu_ptr(rcu_dereference_raw(fb->private_data));
300 rcu_read_unlock();
302 spin_lock_irqsave(&fb_priv_cpu->flock, flags);
303 sf = fb_priv_cpu->filter;
304 if (sf) {
305 unsigned int i;
306 if (sf->bpf_func == sk_run_filter)
307 seq_puts(m, "bpf jit: 0\n");
308 else
309 seq_puts(m, "bpf jit: 1\n");
310 seq_puts(m, "code:\n");
311 for (i = 0; i < sf->len; ++i) {
312 char sline[32];
313 memset(sline, 0, sizeof(sline));
314 snprintf(sline, sizeof(sline),
315 "{ 0x%x, %u, %u, 0x%x }\n",
316 sf->insns[i].code,
317 sf->insns[i].jt,
318 sf->insns[i].jf,
319 sf->insns[i].k);
320 sline[sizeof(sline) - 1] = 0;
321 seq_puts(m, sline);
324 spin_unlock_irqrestore(&fb_priv_cpu->flock, flags);
325 put_online_cpus();
327 return 0;
330 static int fb_bpf_proc_open(struct inode *inode, struct file *file)
332 return single_open(file, fb_bpf_proc_show_filter, PDE(inode)->data);
335 #define MAX_BUFF_SIZ 16384
336 #define MAX_INSTR_SIZ 512
338 static ssize_t fb_bpf_proc_write(struct file *file, const char __user * ubuff,
339 size_t count, loff_t * offset)
341 int i;
342 ssize_t ret = 0;
343 char *code, *ptr1, *ptr2;
344 size_t len = MAX_BUFF_SIZ;
345 struct sock_fprog_kern *fp;
346 struct fblock *fb = PDE(file->f_path.dentry->d_inode)->data;
348 if (count > MAX_BUFF_SIZ)
349 return -EINVAL;
350 if (count < MAX_BUFF_SIZ)
351 len = count;
353 code = kmalloc(len, GFP_KERNEL);
354 if (!code)
355 return -ENOMEM;
356 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
357 if (!fp)
358 goto err;
359 fp->filter = kmalloc(MAX_INSTR_SIZ * sizeof(struct sock_filter), GFP_KERNEL);
360 if (!fp->filter)
361 goto err2;
362 memset(code, 0, len);
363 if (copy_from_user(code, ubuff, len)) {
364 ret = -EFAULT;
365 goto err3;
368 ptr1 = code;
369 ptr2 = NULL;
370 fp->len = 0;
372 while (fp->len < MAX_INSTR_SIZ && (char *) (code + len) > ptr1) {
373 while (ptr1 && (*ptr1 == ' ' || *ptr1 == '{'))
374 ptr1++;
375 fp->filter[fp->len].code = (__u16) simple_strtoul(ptr1, &ptr2, 16);
376 while (ptr2 && (*ptr2 == ' ' || *ptr2 == ','))
377 ptr2++;
378 fp->filter[fp->len].jt = (__u8) simple_strtoul(ptr2, &ptr1, 10);
379 while (ptr1 && (*ptr1 == ' ' || *ptr1 == ','))
380 ptr1++;
381 fp->filter[fp->len].jf = (__u8) simple_strtoul(ptr1, &ptr2, 10);
382 while (ptr2 && (*ptr2 == ' ' || *ptr2 == ','))
383 ptr2++;
384 fp->filter[fp->len].k = (__u32) simple_strtoul(ptr2, &ptr1, 16);
385 while (ptr1 && (*ptr1 == ' ' || *ptr1 == ',' || *ptr1 == '}' ||
386 *ptr1 == '\n'))
387 ptr1++;
388 fp->len++;
391 if (fp->len == MAX_INSTR_SIZ) {
392 printk(KERN_ERR "[%s::%s] Maximun instruction size exeeded!\n",
393 fb->name, fb->factory->type);
394 goto err3;
397 printk(KERN_ERR "[%s::%s] Parsed code:\n", fb->name, fb->factory->type);
398 for (i = 0; i < fp->len; ++i) {
399 printk(KERN_INFO "[%s::%s] %d: c:0x%x jt:%u jf:%u k:0x%x\n",
400 fb->name, fb->factory->type, i,
401 fp->filter[i].code, fp->filter[i].jt, fp->filter[i].jf,
402 fp->filter[i].k);
405 fb_bpf_cleanup_filter_cpus(fb);
406 ret = fb_bpf_init_filter_cpus(fb, fp);
407 if (!ret)
408 printk(KERN_INFO "[%s::%s] Filter injected!\n",
409 fb->name, fb->factory->type);
410 else {
411 printk(KERN_ERR "[%s::%s] Filter injection error: %ld!\n",
412 fb->name, fb->factory->type, ret);
413 fb_bpf_cleanup_filter_cpus(fb);
416 kfree(code);
417 kfree(fp->filter);
418 kfree(fp);
420 return count;
421 err3:
422 kfree(fp->filter);
423 err2:
424 kfree(fp);
425 err:
426 kfree(code);
427 return !ret ? -ENOMEM : ret;
431 static const struct file_operations fb_bpf_proc_fops = {
432 .owner = THIS_MODULE,
433 .open = fb_bpf_proc_open,
434 .read = seq_read,
435 .llseek = seq_lseek,
436 .write = fb_bpf_proc_write,
437 .release = single_release,
440 static struct fblock *fb_bpf_ctor(char *name)
442 int ret = 0;
443 unsigned int cpu;
444 struct fblock *fb;
445 struct fb_bpf_priv __percpu *fb_priv;
446 struct proc_dir_entry *fb_proc;
448 fb = alloc_fblock(GFP_ATOMIC);
449 if (!fb)
450 return NULL;
452 fb_priv = alloc_percpu(struct fb_bpf_priv);
453 if (!fb_priv)
454 goto err;
456 get_online_cpus();
457 for_each_online_cpu(cpu) {
458 struct fb_bpf_priv *fb_priv_cpu;
459 fb_priv_cpu = per_cpu_ptr(fb_priv, cpu);
460 spin_lock_init(&fb_priv_cpu->flock);
461 fb_priv_cpu->port[0] = IDP_UNKNOWN;
462 fb_priv_cpu->port[1] = IDP_UNKNOWN;
463 fb_priv_cpu->filter = NULL;
465 put_online_cpus();
467 ret = init_fblock(fb, name, fb_priv);
468 if (ret)
469 goto err2;
471 fb->netfb_rx = fb_bpf_netrx;
472 fb->event_rx = fb_bpf_event;
474 fb_proc = proc_create_data(fb->name, 0444, fblock_proc_dir,
475 &fb_bpf_proc_fops, (void *)(long) fb);
476 if (!fb_proc)
477 goto err3;
479 ret = register_fblock_namespace(fb);
480 if (ret)
481 goto err4;
483 __module_get(THIS_MODULE);
485 return fb;
486 err4:
487 remove_proc_entry(fb->name, fblock_proc_dir);
488 err3:
489 cleanup_fblock_ctor(fb);
490 err2:
491 free_percpu(fb_priv);
492 err:
493 kfree_fblock(fb);
494 return NULL;
497 static void fb_bpf_dtor(struct fblock *fb)
499 free_percpu(rcu_dereference_raw(fb->private_data));
500 remove_proc_entry(fb->name, fblock_proc_dir);
501 module_put(THIS_MODULE);
504 static void fb_bpf_dtor_outside_rcu(struct fblock *fb)
506 fb_bpf_cleanup_filter_cpus(fb);
509 static struct fblock_factory fb_bpf_factory = {
510 .type = "bpf",
511 .mode = MODE_DUAL,
512 .ctor = fb_bpf_ctor,
513 .dtor = fb_bpf_dtor,
514 .dtor_outside_rcu = fb_bpf_dtor_outside_rcu,
515 .owner = THIS_MODULE,
518 static int __init init_fb_bpf_module(void)
520 return register_fblock_type(&fb_bpf_factory);
523 static void __exit cleanup_fb_bpf_module(void)
525 synchronize_rcu();
526 unregister_fblock_type(&fb_bpf_factory);
529 module_init(init_fb_bpf_module);
530 module_exit(cleanup_fb_bpf_module);
532 MODULE_LICENSE("GPL");
533 MODULE_AUTHOR("Daniel Borkmann <dborkma@tik.ee.ethz.ch>");
534 MODULE_DESCRIPTION("LANA Berkeley Packet Filter module");