Staging: hv: remove OnChildDeviceRemove vmbus_driver callback
[zen-stable.git] / net / netfilter / nf_conntrack_expect.c
blob46e8966912b1d9db01fc2997ce0574f0c6094062
1 /* Expectation handling for nf_conntrack. */
3 /* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30 #include <net/netfilter/nf_conntrack_zones.h>
32 unsigned int nf_ct_expect_hsize __read_mostly;
33 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
35 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
36 unsigned int nf_ct_expect_max __read_mostly;
37 static int nf_ct_expect_hash_rnd_initted __read_mostly;
39 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
41 static HLIST_HEAD(nf_ct_userspace_expect_list);
43 /* nf_conntrack_expect helper functions */
44 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
45 u32 pid, int report)
47 struct nf_conn_help *master_help = nfct_help(exp->master);
48 struct net *net = nf_ct_exp_net(exp);
50 NF_CT_ASSERT(!timer_pending(&exp->timeout));
52 hlist_del_rcu(&exp->hnode);
53 net->ct.expect_count--;
55 hlist_del(&exp->lnode);
56 if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
57 master_help->expecting[exp->class]--;
59 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
60 nf_ct_expect_put(exp);
62 NF_CT_STAT_INC(net, expect_delete);
64 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
66 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
68 struct nf_conntrack_expect *exp = (void *)ul_expect;
70 spin_lock_bh(&nf_conntrack_lock);
71 nf_ct_unlink_expect(exp);
72 spin_unlock_bh(&nf_conntrack_lock);
73 nf_ct_expect_put(exp);
76 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
78 unsigned int hash;
80 if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
81 get_random_bytes(&nf_ct_expect_hash_rnd,
82 sizeof(nf_ct_expect_hash_rnd));
83 nf_ct_expect_hash_rnd_initted = 1;
86 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
87 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
88 (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
89 return ((u64)hash * nf_ct_expect_hsize) >> 32;
92 struct nf_conntrack_expect *
93 __nf_ct_expect_find(struct net *net, u16 zone,
94 const struct nf_conntrack_tuple *tuple)
96 struct nf_conntrack_expect *i;
97 struct hlist_node *n;
98 unsigned int h;
100 if (!net->ct.expect_count)
101 return NULL;
103 h = nf_ct_expect_dst_hash(tuple);
104 hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
105 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
106 nf_ct_zone(i->master) == zone)
107 return i;
109 return NULL;
111 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
113 /* Just find a expectation corresponding to a tuple. */
114 struct nf_conntrack_expect *
115 nf_ct_expect_find_get(struct net *net, u16 zone,
116 const struct nf_conntrack_tuple *tuple)
118 struct nf_conntrack_expect *i;
120 rcu_read_lock();
121 i = __nf_ct_expect_find(net, zone, tuple);
122 if (i && !atomic_inc_not_zero(&i->use))
123 i = NULL;
124 rcu_read_unlock();
126 return i;
128 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
130 /* If an expectation for this connection is found, it gets delete from
131 * global list then returned. */
132 struct nf_conntrack_expect *
133 nf_ct_find_expectation(struct net *net, u16 zone,
134 const struct nf_conntrack_tuple *tuple)
136 struct nf_conntrack_expect *i, *exp = NULL;
137 struct hlist_node *n;
138 unsigned int h;
140 if (!net->ct.expect_count)
141 return NULL;
143 h = nf_ct_expect_dst_hash(tuple);
144 hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
145 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
146 nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
147 nf_ct_zone(i->master) == zone) {
148 exp = i;
149 break;
152 if (!exp)
153 return NULL;
155 /* If master is not in hash table yet (ie. packet hasn't left
156 this machine yet), how can other end know about expected?
157 Hence these are not the droids you are looking for (if
158 master ct never got confirmed, we'd hold a reference to it
159 and weird things would happen to future packets). */
160 if (!nf_ct_is_confirmed(exp->master))
161 return NULL;
163 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
164 atomic_inc(&exp->use);
165 return exp;
166 } else if (del_timer(&exp->timeout)) {
167 nf_ct_unlink_expect(exp);
168 return exp;
171 return NULL;
174 /* delete all expectations for this conntrack */
175 void nf_ct_remove_expectations(struct nf_conn *ct)
177 struct nf_conn_help *help = nfct_help(ct);
178 struct nf_conntrack_expect *exp;
179 struct hlist_node *n, *next;
181 /* Optimization: most connection never expect any others. */
182 if (!help)
183 return;
185 hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
186 if (del_timer(&exp->timeout)) {
187 nf_ct_unlink_expect(exp);
188 nf_ct_expect_put(exp);
192 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
194 /* Would two expected things clash? */
195 static inline int expect_clash(const struct nf_conntrack_expect *a,
196 const struct nf_conntrack_expect *b)
198 /* Part covered by intersection of masks must be unequal,
199 otherwise they clash */
200 struct nf_conntrack_tuple_mask intersect_mask;
201 int count;
203 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
205 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
206 intersect_mask.src.u3.all[count] =
207 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
210 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
213 static inline int expect_matches(const struct nf_conntrack_expect *a,
214 const struct nf_conntrack_expect *b)
216 return a->master == b->master && a->class == b->class &&
217 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
218 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
219 nf_ct_zone(a->master) == nf_ct_zone(b->master);
222 /* Generally a bad idea to call this: could have matched already. */
223 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
225 spin_lock_bh(&nf_conntrack_lock);
226 if (del_timer(&exp->timeout)) {
227 nf_ct_unlink_expect(exp);
228 nf_ct_expect_put(exp);
230 spin_unlock_bh(&nf_conntrack_lock);
232 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
234 /* We don't increase the master conntrack refcount for non-fulfilled
235 * conntracks. During the conntrack destruction, the expectations are
236 * always killed before the conntrack itself */
237 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
239 struct nf_conntrack_expect *new;
241 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
242 if (!new)
243 return NULL;
245 new->master = me;
246 atomic_set(&new->use, 1);
247 return new;
249 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
251 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
252 u_int8_t family,
253 const union nf_inet_addr *saddr,
254 const union nf_inet_addr *daddr,
255 u_int8_t proto, const __be16 *src, const __be16 *dst)
257 int len;
259 if (family == AF_INET)
260 len = 4;
261 else
262 len = 16;
264 exp->flags = 0;
265 exp->class = class;
266 exp->expectfn = NULL;
267 exp->helper = NULL;
268 exp->tuple.src.l3num = family;
269 exp->tuple.dst.protonum = proto;
271 if (saddr) {
272 memcpy(&exp->tuple.src.u3, saddr, len);
273 if (sizeof(exp->tuple.src.u3) > len)
274 /* address needs to be cleared for nf_ct_tuple_equal */
275 memset((void *)&exp->tuple.src.u3 + len, 0x00,
276 sizeof(exp->tuple.src.u3) - len);
277 memset(&exp->mask.src.u3, 0xFF, len);
278 if (sizeof(exp->mask.src.u3) > len)
279 memset((void *)&exp->mask.src.u3 + len, 0x00,
280 sizeof(exp->mask.src.u3) - len);
281 } else {
282 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
283 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
286 if (src) {
287 exp->tuple.src.u.all = *src;
288 exp->mask.src.u.all = htons(0xFFFF);
289 } else {
290 exp->tuple.src.u.all = 0;
291 exp->mask.src.u.all = 0;
294 memcpy(&exp->tuple.dst.u3, daddr, len);
295 if (sizeof(exp->tuple.dst.u3) > len)
296 /* address needs to be cleared for nf_ct_tuple_equal */
297 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
298 sizeof(exp->tuple.dst.u3) - len);
300 exp->tuple.dst.u.all = *dst;
302 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
304 static void nf_ct_expect_free_rcu(struct rcu_head *head)
306 struct nf_conntrack_expect *exp;
308 exp = container_of(head, struct nf_conntrack_expect, rcu);
309 kmem_cache_free(nf_ct_expect_cachep, exp);
312 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
314 if (atomic_dec_and_test(&exp->use))
315 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
317 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
319 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
321 struct nf_conn_help *master_help = nfct_help(exp->master);
322 struct net *net = nf_ct_exp_net(exp);
323 const struct nf_conntrack_expect_policy *p;
324 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
326 atomic_inc(&exp->use);
328 if (master_help) {
329 hlist_add_head(&exp->lnode, &master_help->expectations);
330 master_help->expecting[exp->class]++;
331 } else if (exp->flags & NF_CT_EXPECT_USERSPACE)
332 hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
334 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
335 net->ct.expect_count++;
337 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
338 (unsigned long)exp);
339 if (master_help) {
340 p = &master_help->helper->expect_policy[exp->class];
341 exp->timeout.expires = jiffies + p->timeout * HZ;
343 add_timer(&exp->timeout);
345 atomic_inc(&exp->use);
346 NF_CT_STAT_INC(net, expect_create);
349 /* Race with expectations being used means we could have none to find; OK. */
350 static void evict_oldest_expect(struct nf_conn *master,
351 struct nf_conntrack_expect *new)
353 struct nf_conn_help *master_help = nfct_help(master);
354 struct nf_conntrack_expect *exp, *last = NULL;
355 struct hlist_node *n;
357 hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
358 if (exp->class == new->class)
359 last = exp;
362 if (last && del_timer(&last->timeout)) {
363 nf_ct_unlink_expect(last);
364 nf_ct_expect_put(last);
368 static inline int refresh_timer(struct nf_conntrack_expect *i)
370 struct nf_conn_help *master_help = nfct_help(i->master);
371 const struct nf_conntrack_expect_policy *p;
373 if (!del_timer(&i->timeout))
374 return 0;
376 p = &master_help->helper->expect_policy[i->class];
377 i->timeout.expires = jiffies + p->timeout * HZ;
378 add_timer(&i->timeout);
379 return 1;
382 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
384 const struct nf_conntrack_expect_policy *p;
385 struct nf_conntrack_expect *i;
386 struct nf_conn *master = expect->master;
387 struct nf_conn_help *master_help = nfct_help(master);
388 struct net *net = nf_ct_exp_net(expect);
389 struct hlist_node *n;
390 unsigned int h;
391 int ret = 1;
393 /* Don't allow expectations created from kernel-space with no helper */
394 if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
395 (!master_help || (master_help && !master_help->helper))) {
396 ret = -ESHUTDOWN;
397 goto out;
399 h = nf_ct_expect_dst_hash(&expect->tuple);
400 hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
401 if (expect_matches(i, expect)) {
402 /* Refresh timer: if it's dying, ignore.. */
403 if (refresh_timer(i)) {
404 ret = 0;
405 goto out;
407 } else if (expect_clash(i, expect)) {
408 ret = -EBUSY;
409 goto out;
412 /* Will be over limit? */
413 if (master_help) {
414 p = &master_help->helper->expect_policy[expect->class];
415 if (p->max_expected &&
416 master_help->expecting[expect->class] >= p->max_expected) {
417 evict_oldest_expect(master, expect);
418 if (master_help->expecting[expect->class]
419 >= p->max_expected) {
420 ret = -EMFILE;
421 goto out;
426 if (net->ct.expect_count >= nf_ct_expect_max) {
427 if (net_ratelimit())
428 printk(KERN_WARNING
429 "nf_conntrack: expectation table full\n");
430 ret = -EMFILE;
432 out:
433 return ret;
436 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
437 u32 pid, int report)
439 int ret;
441 spin_lock_bh(&nf_conntrack_lock);
442 ret = __nf_ct_expect_check(expect);
443 if (ret <= 0)
444 goto out;
446 ret = 0;
447 nf_ct_expect_insert(expect);
448 spin_unlock_bh(&nf_conntrack_lock);
449 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
450 return ret;
451 out:
452 spin_unlock_bh(&nf_conntrack_lock);
453 return ret;
455 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
457 void nf_ct_remove_userspace_expectations(void)
459 struct nf_conntrack_expect *exp;
460 struct hlist_node *n, *next;
462 hlist_for_each_entry_safe(exp, n, next,
463 &nf_ct_userspace_expect_list, lnode) {
464 if (del_timer(&exp->timeout)) {
465 nf_ct_unlink_expect(exp);
466 nf_ct_expect_put(exp);
470 EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
472 #ifdef CONFIG_PROC_FS
473 struct ct_expect_iter_state {
474 struct seq_net_private p;
475 unsigned int bucket;
478 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
480 struct net *net = seq_file_net(seq);
481 struct ct_expect_iter_state *st = seq->private;
482 struct hlist_node *n;
484 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
485 n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
486 if (n)
487 return n;
489 return NULL;
492 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
493 struct hlist_node *head)
495 struct net *net = seq_file_net(seq);
496 struct ct_expect_iter_state *st = seq->private;
498 head = rcu_dereference(head->next);
499 while (head == NULL) {
500 if (++st->bucket >= nf_ct_expect_hsize)
501 return NULL;
502 head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
504 return head;
507 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
509 struct hlist_node *head = ct_expect_get_first(seq);
511 if (head)
512 while (pos && (head = ct_expect_get_next(seq, head)))
513 pos--;
514 return pos ? NULL : head;
517 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
518 __acquires(RCU)
520 rcu_read_lock();
521 return ct_expect_get_idx(seq, *pos);
524 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
526 (*pos)++;
527 return ct_expect_get_next(seq, v);
530 static void exp_seq_stop(struct seq_file *seq, void *v)
531 __releases(RCU)
533 rcu_read_unlock();
536 static int exp_seq_show(struct seq_file *s, void *v)
538 struct nf_conntrack_expect *expect;
539 struct nf_conntrack_helper *helper;
540 struct hlist_node *n = v;
541 char *delim = "";
543 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
545 if (expect->timeout.function)
546 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
547 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
548 else
549 seq_printf(s, "- ");
550 seq_printf(s, "l3proto = %u proto=%u ",
551 expect->tuple.src.l3num,
552 expect->tuple.dst.protonum);
553 print_tuple(s, &expect->tuple,
554 __nf_ct_l3proto_find(expect->tuple.src.l3num),
555 __nf_ct_l4proto_find(expect->tuple.src.l3num,
556 expect->tuple.dst.protonum));
558 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
559 seq_printf(s, "PERMANENT");
560 delim = ",";
562 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
563 seq_printf(s, "%sINACTIVE", delim);
564 delim = ",";
566 if (expect->flags & NF_CT_EXPECT_USERSPACE)
567 seq_printf(s, "%sUSERSPACE", delim);
569 helper = rcu_dereference(nfct_help(expect->master)->helper);
570 if (helper) {
571 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
572 if (helper->expect_policy[expect->class].name)
573 seq_printf(s, "/%s",
574 helper->expect_policy[expect->class].name);
577 return seq_putc(s, '\n');
580 static const struct seq_operations exp_seq_ops = {
581 .start = exp_seq_start,
582 .next = exp_seq_next,
583 .stop = exp_seq_stop,
584 .show = exp_seq_show
587 static int exp_open(struct inode *inode, struct file *file)
589 return seq_open_net(inode, file, &exp_seq_ops,
590 sizeof(struct ct_expect_iter_state));
593 static const struct file_operations exp_file_ops = {
594 .owner = THIS_MODULE,
595 .open = exp_open,
596 .read = seq_read,
597 .llseek = seq_lseek,
598 .release = seq_release_net,
600 #endif /* CONFIG_PROC_FS */
602 static int exp_proc_init(struct net *net)
604 #ifdef CONFIG_PROC_FS
605 struct proc_dir_entry *proc;
607 proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
608 if (!proc)
609 return -ENOMEM;
610 #endif /* CONFIG_PROC_FS */
611 return 0;
614 static void exp_proc_remove(struct net *net)
616 #ifdef CONFIG_PROC_FS
617 proc_net_remove(net, "nf_conntrack_expect");
618 #endif /* CONFIG_PROC_FS */
621 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
623 int nf_conntrack_expect_init(struct net *net)
625 int err = -ENOMEM;
627 if (net_eq(net, &init_net)) {
628 if (!nf_ct_expect_hsize) {
629 nf_ct_expect_hsize = net->ct.htable_size / 256;
630 if (!nf_ct_expect_hsize)
631 nf_ct_expect_hsize = 1;
633 nf_ct_expect_max = nf_ct_expect_hsize * 4;
636 net->ct.expect_count = 0;
637 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
638 &net->ct.expect_vmalloc, 0);
639 if (net->ct.expect_hash == NULL)
640 goto err1;
642 if (net_eq(net, &init_net)) {
643 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
644 sizeof(struct nf_conntrack_expect),
645 0, 0, NULL);
646 if (!nf_ct_expect_cachep)
647 goto err2;
650 err = exp_proc_init(net);
651 if (err < 0)
652 goto err3;
654 return 0;
656 err3:
657 if (net_eq(net, &init_net))
658 kmem_cache_destroy(nf_ct_expect_cachep);
659 err2:
660 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
661 nf_ct_expect_hsize);
662 err1:
663 return err;
666 void nf_conntrack_expect_fini(struct net *net)
668 exp_proc_remove(net);
669 if (net_eq(net, &init_net)) {
670 rcu_barrier(); /* Wait for call_rcu() before destroy */
671 kmem_cache_destroy(nf_ct_expect_cachep);
673 nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
674 nf_ct_expect_hsize);