ia64: remove setting for saved_max_pfn
[linux/fpc-iii.git] / net / core / neighbour.c
blob5c56b217b999d114ea7954a9e6060a77d906ceff
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...) \
45 do { \
46 if (level <= NEIGH_DEBUG) \
47 pr_debug(fmt, ##__VA_ARGS__); \
48 } while (0)
50 #define PNEIGH_HASHMASK 0xF
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
63 Neighbour hash table buckets are protected with rwlock tbl->lock.
65 - All the scans/updates to hash buckets MUST be made under this lock.
66 - NOTHING clever should be made under this lock: no callbacks
67 to protocol backends, no attempts to send something to network.
68 It will result in deadlocks, if backend/driver wants to use neighbour
69 cache.
70 - If the entry requires some non-trivial actions, increase
71 its reference count and release table lock.
73 Neighbour entries are protected:
74 - with reference count.
75 - with rwlock neigh->lock
77 Reference count prevents destruction.
79 neigh->lock mainly serializes ll address data and its validity state.
80 However, the same lock is used to protect another entry fields:
81 - timer
82 - resolution queue
84 Again, nothing clever shall be made under neigh->lock,
85 the most complicated procedure, which we allow is dev->hard_header.
86 It is supposed, that dev->hard_header is simplistic and does
87 not make callbacks to neighbour tables.
89 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90 list of neighbour tables. This list is used only in process context,
93 static DEFINE_RWLOCK(neigh_tbl_lock);
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
97 kfree_skb(skb);
98 return -ENETDOWN;
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
103 if (neigh->parms->neigh_cleanup)
104 neigh->parms->neigh_cleanup(neigh);
106 __neigh_notify(neigh, RTM_DELNEIGH, 0);
107 neigh_release(neigh);
111 * It is random distribution in the interval (1/2)*base...(3/2)*base.
112 * It corresponds to default IPv6 settings and is not overridable,
113 * because it is really reasonable choice.
116 unsigned long neigh_rand_reach_time(unsigned long base)
118 return base ? (net_random() % base) + (base >> 1) : 0;
120 EXPORT_SYMBOL(neigh_rand_reach_time);
123 static int neigh_forced_gc(struct neigh_table *tbl)
125 int shrunk = 0;
126 int i;
127 struct neigh_hash_table *nht;
129 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
131 write_lock_bh(&tbl->lock);
132 nht = rcu_dereference_protected(tbl->nht,
133 lockdep_is_held(&tbl->lock));
134 for (i = 0; i < (1 << nht->hash_shift); i++) {
135 struct neighbour *n;
136 struct neighbour __rcu **np;
138 np = &nht->hash_buckets[i];
139 while ((n = rcu_dereference_protected(*np,
140 lockdep_is_held(&tbl->lock))) != NULL) {
141 /* Neighbour record may be discarded if:
142 * - nobody refers to it.
143 * - it is not permanent
145 write_lock(&n->lock);
146 if (atomic_read(&n->refcnt) == 1 &&
147 !(n->nud_state & NUD_PERMANENT)) {
148 rcu_assign_pointer(*np,
149 rcu_dereference_protected(n->next,
150 lockdep_is_held(&tbl->lock)));
151 n->dead = 1;
152 shrunk = 1;
153 write_unlock(&n->lock);
154 neigh_cleanup_and_release(n);
155 continue;
157 write_unlock(&n->lock);
158 np = &n->next;
162 tbl->last_flush = jiffies;
164 write_unlock_bh(&tbl->lock);
166 return shrunk;
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
171 neigh_hold(n);
172 if (unlikely(mod_timer(&n->timer, when))) {
173 printk("NEIGH: BUG, double timer add, state is %x\n",
174 n->nud_state);
175 dump_stack();
179 static int neigh_del_timer(struct neighbour *n)
181 if ((n->nud_state & NUD_IN_TIMER) &&
182 del_timer(&n->timer)) {
183 neigh_release(n);
184 return 1;
186 return 0;
189 static void pneigh_queue_purge(struct sk_buff_head *list)
191 struct sk_buff *skb;
193 while ((skb = skb_dequeue(list)) != NULL) {
194 dev_put(skb->dev);
195 kfree_skb(skb);
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
201 int i;
202 struct neigh_hash_table *nht;
204 nht = rcu_dereference_protected(tbl->nht,
205 lockdep_is_held(&tbl->lock));
207 for (i = 0; i < (1 << nht->hash_shift); i++) {
208 struct neighbour *n;
209 struct neighbour __rcu **np = &nht->hash_buckets[i];
211 while ((n = rcu_dereference_protected(*np,
212 lockdep_is_held(&tbl->lock))) != NULL) {
213 if (dev && n->dev != dev) {
214 np = &n->next;
215 continue;
217 rcu_assign_pointer(*np,
218 rcu_dereference_protected(n->next,
219 lockdep_is_held(&tbl->lock)));
220 write_lock(&n->lock);
221 neigh_del_timer(n);
222 n->dead = 1;
224 if (atomic_read(&n->refcnt) != 1) {
225 /* The most unpleasant situation.
226 We must destroy neighbour entry,
227 but someone still uses it.
229 The destroy will be delayed until
230 the last user releases us, but
231 we must kill timers etc. and move
232 it to safe state.
234 skb_queue_purge(&n->arp_queue);
235 n->arp_queue_len_bytes = 0;
236 n->output = neigh_blackhole;
237 if (n->nud_state & NUD_VALID)
238 n->nud_state = NUD_NOARP;
239 else
240 n->nud_state = NUD_NONE;
241 neigh_dbg(2, "neigh %p is stray\n", n);
243 write_unlock(&n->lock);
244 neigh_cleanup_and_release(n);
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
251 write_lock_bh(&tbl->lock);
252 neigh_flush_dev(tbl, dev);
253 write_unlock_bh(&tbl->lock);
255 EXPORT_SYMBOL(neigh_changeaddr);
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
259 write_lock_bh(&tbl->lock);
260 neigh_flush_dev(tbl, dev);
261 pneigh_ifdown(tbl, dev);
262 write_unlock_bh(&tbl->lock);
264 del_timer_sync(&tbl->proxy_timer);
265 pneigh_queue_purge(&tbl->proxy_queue);
266 return 0;
268 EXPORT_SYMBOL(neigh_ifdown);
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
272 struct neighbour *n = NULL;
273 unsigned long now = jiffies;
274 int entries;
276 entries = atomic_inc_return(&tbl->entries) - 1;
277 if (entries >= tbl->gc_thresh3 ||
278 (entries >= tbl->gc_thresh2 &&
279 time_after(now, tbl->last_flush + 5 * HZ))) {
280 if (!neigh_forced_gc(tbl) &&
281 entries >= tbl->gc_thresh3)
282 goto out_entries;
285 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286 if (!n)
287 goto out_entries;
289 skb_queue_head_init(&n->arp_queue);
290 rwlock_init(&n->lock);
291 seqlock_init(&n->ha_lock);
292 n->updated = n->used = now;
293 n->nud_state = NUD_NONE;
294 n->output = neigh_blackhole;
295 seqlock_init(&n->hh.hh_lock);
296 n->parms = neigh_parms_clone(&tbl->parms);
297 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
299 NEIGH_CACHE_STAT_INC(tbl, allocs);
300 n->tbl = tbl;
301 atomic_set(&n->refcnt, 1);
302 n->dead = 1;
303 out:
304 return n;
306 out_entries:
307 atomic_dec(&tbl->entries);
308 goto out;
311 static void neigh_get_hash_rnd(u32 *x)
313 get_random_bytes(x, sizeof(*x));
314 *x |= 1;
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
319 size_t size = (1 << shift) * sizeof(struct neighbour *);
320 struct neigh_hash_table *ret;
321 struct neighbour __rcu **buckets;
322 int i;
324 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325 if (!ret)
326 return NULL;
327 if (size <= PAGE_SIZE)
328 buckets = kzalloc(size, GFP_ATOMIC);
329 else
330 buckets = (struct neighbour __rcu **)
331 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332 get_order(size));
333 if (!buckets) {
334 kfree(ret);
335 return NULL;
337 ret->hash_buckets = buckets;
338 ret->hash_shift = shift;
339 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341 return ret;
344 static void neigh_hash_free_rcu(struct rcu_head *head)
346 struct neigh_hash_table *nht = container_of(head,
347 struct neigh_hash_table,
348 rcu);
349 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350 struct neighbour __rcu **buckets = nht->hash_buckets;
352 if (size <= PAGE_SIZE)
353 kfree(buckets);
354 else
355 free_pages((unsigned long)buckets, get_order(size));
356 kfree(nht);
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360 unsigned long new_shift)
362 unsigned int i, hash;
363 struct neigh_hash_table *new_nht, *old_nht;
365 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
367 old_nht = rcu_dereference_protected(tbl->nht,
368 lockdep_is_held(&tbl->lock));
369 new_nht = neigh_hash_alloc(new_shift);
370 if (!new_nht)
371 return old_nht;
373 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374 struct neighbour *n, *next;
376 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377 lockdep_is_held(&tbl->lock));
378 n != NULL;
379 n = next) {
380 hash = tbl->hash(n->primary_key, n->dev,
381 new_nht->hash_rnd);
383 hash >>= (32 - new_nht->hash_shift);
384 next = rcu_dereference_protected(n->next,
385 lockdep_is_held(&tbl->lock));
387 rcu_assign_pointer(n->next,
388 rcu_dereference_protected(
389 new_nht->hash_buckets[hash],
390 lockdep_is_held(&tbl->lock)));
391 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
395 rcu_assign_pointer(tbl->nht, new_nht);
396 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397 return new_nht;
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401 struct net_device *dev)
403 struct neighbour *n;
404 int key_len = tbl->key_len;
405 u32 hash_val;
406 struct neigh_hash_table *nht;
408 NEIGH_CACHE_STAT_INC(tbl, lookups);
410 rcu_read_lock_bh();
411 nht = rcu_dereference_bh(tbl->nht);
412 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
414 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415 n != NULL;
416 n = rcu_dereference_bh(n->next)) {
417 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418 if (!atomic_inc_not_zero(&n->refcnt))
419 n = NULL;
420 NEIGH_CACHE_STAT_INC(tbl, hits);
421 break;
425 rcu_read_unlock_bh();
426 return n;
428 EXPORT_SYMBOL(neigh_lookup);
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431 const void *pkey)
433 struct neighbour *n;
434 int key_len = tbl->key_len;
435 u32 hash_val;
436 struct neigh_hash_table *nht;
438 NEIGH_CACHE_STAT_INC(tbl, lookups);
440 rcu_read_lock_bh();
441 nht = rcu_dereference_bh(tbl->nht);
442 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
444 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445 n != NULL;
446 n = rcu_dereference_bh(n->next)) {
447 if (!memcmp(n->primary_key, pkey, key_len) &&
448 net_eq(dev_net(n->dev), net)) {
449 if (!atomic_inc_not_zero(&n->refcnt))
450 n = NULL;
451 NEIGH_CACHE_STAT_INC(tbl, hits);
452 break;
456 rcu_read_unlock_bh();
457 return n;
459 EXPORT_SYMBOL(neigh_lookup_nodev);
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462 struct net_device *dev, bool want_ref)
464 u32 hash_val;
465 int key_len = tbl->key_len;
466 int error;
467 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468 struct neigh_hash_table *nht;
470 if (!n) {
471 rc = ERR_PTR(-ENOBUFS);
472 goto out;
475 memcpy(n->primary_key, pkey, key_len);
476 n->dev = dev;
477 dev_hold(dev);
479 /* Protocol specific setup. */
480 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481 rc = ERR_PTR(error);
482 goto out_neigh_release;
485 if (dev->netdev_ops->ndo_neigh_construct) {
486 error = dev->netdev_ops->ndo_neigh_construct(n);
487 if (error < 0) {
488 rc = ERR_PTR(error);
489 goto out_neigh_release;
493 /* Device specific setup. */
494 if (n->parms->neigh_setup &&
495 (error = n->parms->neigh_setup(n)) < 0) {
496 rc = ERR_PTR(error);
497 goto out_neigh_release;
500 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
502 write_lock_bh(&tbl->lock);
503 nht = rcu_dereference_protected(tbl->nht,
504 lockdep_is_held(&tbl->lock));
506 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
509 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
511 if (n->parms->dead) {
512 rc = ERR_PTR(-EINVAL);
513 goto out_tbl_unlock;
516 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517 lockdep_is_held(&tbl->lock));
518 n1 != NULL;
519 n1 = rcu_dereference_protected(n1->next,
520 lockdep_is_held(&tbl->lock))) {
521 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522 if (want_ref)
523 neigh_hold(n1);
524 rc = n1;
525 goto out_tbl_unlock;
529 n->dead = 0;
530 if (want_ref)
531 neigh_hold(n);
532 rcu_assign_pointer(n->next,
533 rcu_dereference_protected(nht->hash_buckets[hash_val],
534 lockdep_is_held(&tbl->lock)));
535 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536 write_unlock_bh(&tbl->lock);
537 neigh_dbg(2, "neigh %p is created\n", n);
538 rc = n;
539 out:
540 return rc;
541 out_tbl_unlock:
542 write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544 neigh_release(n);
545 goto out;
547 EXPORT_SYMBOL(__neigh_create);
549 static u32 pneigh_hash(const void *pkey, int key_len)
551 u32 hash_val = *(u32 *)(pkey + key_len - 4);
552 hash_val ^= (hash_val >> 16);
553 hash_val ^= hash_val >> 8;
554 hash_val ^= hash_val >> 4;
555 hash_val &= PNEIGH_HASHMASK;
556 return hash_val;
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560 struct net *net,
561 const void *pkey,
562 int key_len,
563 struct net_device *dev)
565 while (n) {
566 if (!memcmp(n->key, pkey, key_len) &&
567 net_eq(pneigh_net(n), net) &&
568 (n->dev == dev || !n->dev))
569 return n;
570 n = n->next;
572 return NULL;
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576 struct net *net, const void *pkey, struct net_device *dev)
578 int key_len = tbl->key_len;
579 u32 hash_val = pneigh_hash(pkey, key_len);
581 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582 net, pkey, key_len, dev);
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587 struct net *net, const void *pkey,
588 struct net_device *dev, int creat)
590 struct pneigh_entry *n;
591 int key_len = tbl->key_len;
592 u32 hash_val = pneigh_hash(pkey, key_len);
594 read_lock_bh(&tbl->lock);
595 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596 net, pkey, key_len, dev);
597 read_unlock_bh(&tbl->lock);
599 if (n || !creat)
600 goto out;
602 ASSERT_RTNL();
604 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605 if (!n)
606 goto out;
608 write_pnet(&n->net, hold_net(net));
609 memcpy(n->key, pkey, key_len);
610 n->dev = dev;
611 if (dev)
612 dev_hold(dev);
614 if (tbl->pconstructor && tbl->pconstructor(n)) {
615 if (dev)
616 dev_put(dev);
617 release_net(net);
618 kfree(n);
619 n = NULL;
620 goto out;
623 write_lock_bh(&tbl->lock);
624 n->next = tbl->phash_buckets[hash_val];
625 tbl->phash_buckets[hash_val] = n;
626 write_unlock_bh(&tbl->lock);
627 out:
628 return n;
630 EXPORT_SYMBOL(pneigh_lookup);
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634 struct net_device *dev)
636 struct pneigh_entry *n, **np;
637 int key_len = tbl->key_len;
638 u32 hash_val = pneigh_hash(pkey, key_len);
640 write_lock_bh(&tbl->lock);
641 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642 np = &n->next) {
643 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644 net_eq(pneigh_net(n), net)) {
645 *np = n->next;
646 write_unlock_bh(&tbl->lock);
647 if (tbl->pdestructor)
648 tbl->pdestructor(n);
649 if (n->dev)
650 dev_put(n->dev);
651 release_net(pneigh_net(n));
652 kfree(n);
653 return 0;
656 write_unlock_bh(&tbl->lock);
657 return -ENOENT;
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
662 struct pneigh_entry *n, **np;
663 u32 h;
665 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666 np = &tbl->phash_buckets[h];
667 while ((n = *np) != NULL) {
668 if (!dev || n->dev == dev) {
669 *np = n->next;
670 if (tbl->pdestructor)
671 tbl->pdestructor(n);
672 if (n->dev)
673 dev_put(n->dev);
674 release_net(pneigh_net(n));
675 kfree(n);
676 continue;
678 np = &n->next;
681 return -ENOENT;
684 static void neigh_parms_destroy(struct neigh_parms *parms);
686 static inline void neigh_parms_put(struct neigh_parms *parms)
688 if (atomic_dec_and_test(&parms->refcnt))
689 neigh_parms_destroy(parms);
693 * neighbour must already be out of the table;
696 void neigh_destroy(struct neighbour *neigh)
698 struct net_device *dev = neigh->dev;
700 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
702 if (!neigh->dead) {
703 pr_warn("Destroying alive neighbour %p\n", neigh);
704 dump_stack();
705 return;
708 if (neigh_del_timer(neigh))
709 pr_warn("Impossible event\n");
711 skb_queue_purge(&neigh->arp_queue);
712 neigh->arp_queue_len_bytes = 0;
714 if (dev->netdev_ops->ndo_neigh_destroy)
715 dev->netdev_ops->ndo_neigh_destroy(neigh);
717 dev_put(dev);
718 neigh_parms_put(neigh->parms);
720 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
722 atomic_dec(&neigh->tbl->entries);
723 kfree_rcu(neigh, rcu);
725 EXPORT_SYMBOL(neigh_destroy);
727 /* Neighbour state is suspicious;
728 disable fast path.
730 Called with write_locked neigh.
732 static void neigh_suspect(struct neighbour *neigh)
734 neigh_dbg(2, "neigh %p is suspected\n", neigh);
736 neigh->output = neigh->ops->output;
739 /* Neighbour state is OK;
740 enable fast path.
742 Called with write_locked neigh.
744 static void neigh_connect(struct neighbour *neigh)
746 neigh_dbg(2, "neigh %p is connected\n", neigh);
748 neigh->output = neigh->ops->connected_output;
751 static void neigh_periodic_work(struct work_struct *work)
753 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
754 struct neighbour *n;
755 struct neighbour __rcu **np;
756 unsigned int i;
757 struct neigh_hash_table *nht;
759 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
761 write_lock_bh(&tbl->lock);
762 nht = rcu_dereference_protected(tbl->nht,
763 lockdep_is_held(&tbl->lock));
765 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
766 goto out;
769 * periodically recompute ReachableTime from random function
772 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
773 struct neigh_parms *p;
774 tbl->last_rand = jiffies;
775 for (p = &tbl->parms; p; p = p->next)
776 p->reachable_time =
777 neigh_rand_reach_time(p->base_reachable_time);
780 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
781 np = &nht->hash_buckets[i];
783 while ((n = rcu_dereference_protected(*np,
784 lockdep_is_held(&tbl->lock))) != NULL) {
785 unsigned int state;
787 write_lock(&n->lock);
789 state = n->nud_state;
790 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
791 write_unlock(&n->lock);
792 goto next_elt;
795 if (time_before(n->used, n->confirmed))
796 n->used = n->confirmed;
798 if (atomic_read(&n->refcnt) == 1 &&
799 (state == NUD_FAILED ||
800 time_after(jiffies, n->used + n->parms->gc_staletime))) {
801 *np = n->next;
802 n->dead = 1;
803 write_unlock(&n->lock);
804 neigh_cleanup_and_release(n);
805 continue;
807 write_unlock(&n->lock);
809 next_elt:
810 np = &n->next;
813 * It's fine to release lock here, even if hash table
814 * grows while we are preempted.
816 write_unlock_bh(&tbl->lock);
817 cond_resched();
818 write_lock_bh(&tbl->lock);
819 nht = rcu_dereference_protected(tbl->nht,
820 lockdep_is_held(&tbl->lock));
822 out:
823 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
824 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
825 * base_reachable_time.
827 schedule_delayed_work(&tbl->gc_work,
828 tbl->parms.base_reachable_time >> 1);
829 write_unlock_bh(&tbl->lock);
832 static __inline__ int neigh_max_probes(struct neighbour *n)
834 struct neigh_parms *p = n->parms;
835 return (n->nud_state & NUD_PROBE) ?
836 p->ucast_probes :
837 p->ucast_probes + p->app_probes + p->mcast_probes;
840 static void neigh_invalidate(struct neighbour *neigh)
841 __releases(neigh->lock)
842 __acquires(neigh->lock)
844 struct sk_buff *skb;
846 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
847 neigh_dbg(2, "neigh %p is failed\n", neigh);
848 neigh->updated = jiffies;
850 /* It is very thin place. report_unreachable is very complicated
851 routine. Particularly, it can hit the same neighbour entry!
853 So that, we try to be accurate and avoid dead loop. --ANK
855 while (neigh->nud_state == NUD_FAILED &&
856 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
857 write_unlock(&neigh->lock);
858 neigh->ops->error_report(neigh, skb);
859 write_lock(&neigh->lock);
861 skb_queue_purge(&neigh->arp_queue);
862 neigh->arp_queue_len_bytes = 0;
865 static void neigh_probe(struct neighbour *neigh)
866 __releases(neigh->lock)
868 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
869 /* keep skb alive even if arp_queue overflows */
870 if (skb)
871 skb = skb_copy(skb, GFP_ATOMIC);
872 write_unlock(&neigh->lock);
873 neigh->ops->solicit(neigh, skb);
874 atomic_inc(&neigh->probes);
875 kfree_skb(skb);
878 /* Called when a timer expires for a neighbour entry. */
880 static void neigh_timer_handler(unsigned long arg)
882 unsigned long now, next;
883 struct neighbour *neigh = (struct neighbour *)arg;
884 unsigned int state;
885 int notify = 0;
887 write_lock(&neigh->lock);
889 state = neigh->nud_state;
890 now = jiffies;
891 next = now + HZ;
893 if (!(state & NUD_IN_TIMER))
894 goto out;
896 if (state & NUD_REACHABLE) {
897 if (time_before_eq(now,
898 neigh->confirmed + neigh->parms->reachable_time)) {
899 neigh_dbg(2, "neigh %p is still alive\n", neigh);
900 next = neigh->confirmed + neigh->parms->reachable_time;
901 } else if (time_before_eq(now,
902 neigh->used + neigh->parms->delay_probe_time)) {
903 neigh_dbg(2, "neigh %p is delayed\n", neigh);
904 neigh->nud_state = NUD_DELAY;
905 neigh->updated = jiffies;
906 neigh_suspect(neigh);
907 next = now + neigh->parms->delay_probe_time;
908 } else {
909 neigh_dbg(2, "neigh %p is suspected\n", neigh);
910 neigh->nud_state = NUD_STALE;
911 neigh->updated = jiffies;
912 neigh_suspect(neigh);
913 notify = 1;
915 } else if (state & NUD_DELAY) {
916 if (time_before_eq(now,
917 neigh->confirmed + neigh->parms->delay_probe_time)) {
918 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
919 neigh->nud_state = NUD_REACHABLE;
920 neigh->updated = jiffies;
921 neigh_connect(neigh);
922 notify = 1;
923 next = neigh->confirmed + neigh->parms->reachable_time;
924 } else {
925 neigh_dbg(2, "neigh %p is probed\n", neigh);
926 neigh->nud_state = NUD_PROBE;
927 neigh->updated = jiffies;
928 atomic_set(&neigh->probes, 0);
929 next = now + neigh->parms->retrans_time;
931 } else {
932 /* NUD_PROBE|NUD_INCOMPLETE */
933 next = now + neigh->parms->retrans_time;
936 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
937 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
938 neigh->nud_state = NUD_FAILED;
939 notify = 1;
940 neigh_invalidate(neigh);
943 if (neigh->nud_state & NUD_IN_TIMER) {
944 if (time_before(next, jiffies + HZ/2))
945 next = jiffies + HZ/2;
946 if (!mod_timer(&neigh->timer, next))
947 neigh_hold(neigh);
949 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
950 neigh_probe(neigh);
951 } else {
952 out:
953 write_unlock(&neigh->lock);
956 if (notify)
957 neigh_update_notify(neigh);
959 neigh_release(neigh);
962 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
964 int rc;
965 bool immediate_probe = false;
967 write_lock_bh(&neigh->lock);
969 rc = 0;
970 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
971 goto out_unlock_bh;
973 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
974 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
975 unsigned long next, now = jiffies;
977 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
978 neigh->nud_state = NUD_INCOMPLETE;
979 neigh->updated = now;
980 next = now + max(neigh->parms->retrans_time, HZ/2);
981 neigh_add_timer(neigh, next);
982 immediate_probe = true;
983 } else {
984 neigh->nud_state = NUD_FAILED;
985 neigh->updated = jiffies;
986 write_unlock_bh(&neigh->lock);
988 kfree_skb(skb);
989 return 1;
991 } else if (neigh->nud_state & NUD_STALE) {
992 neigh_dbg(2, "neigh %p is delayed\n", neigh);
993 neigh->nud_state = NUD_DELAY;
994 neigh->updated = jiffies;
995 neigh_add_timer(neigh,
996 jiffies + neigh->parms->delay_probe_time);
999 if (neigh->nud_state == NUD_INCOMPLETE) {
1000 if (skb) {
1001 while (neigh->arp_queue_len_bytes + skb->truesize >
1002 neigh->parms->queue_len_bytes) {
1003 struct sk_buff *buff;
1005 buff = __skb_dequeue(&neigh->arp_queue);
1006 if (!buff)
1007 break;
1008 neigh->arp_queue_len_bytes -= buff->truesize;
1009 kfree_skb(buff);
1010 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1012 skb_dst_force(skb);
1013 __skb_queue_tail(&neigh->arp_queue, skb);
1014 neigh->arp_queue_len_bytes += skb->truesize;
1016 rc = 1;
1018 out_unlock_bh:
1019 if (immediate_probe)
1020 neigh_probe(neigh);
1021 else
1022 write_unlock(&neigh->lock);
1023 local_bh_enable();
1024 return rc;
1026 EXPORT_SYMBOL(__neigh_event_send);
1028 static void neigh_update_hhs(struct neighbour *neigh)
1030 struct hh_cache *hh;
1031 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1032 = NULL;
1034 if (neigh->dev->header_ops)
1035 update = neigh->dev->header_ops->cache_update;
1037 if (update) {
1038 hh = &neigh->hh;
1039 if (hh->hh_len) {
1040 write_seqlock_bh(&hh->hh_lock);
1041 update(hh, neigh->dev, neigh->ha);
1042 write_sequnlock_bh(&hh->hh_lock);
1049 /* Generic update routine.
1050 -- lladdr is new lladdr or NULL, if it is not supplied.
1051 -- new is new state.
1052 -- flags
1053 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1054 if it is different.
1055 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1056 lladdr instead of overriding it
1057 if it is different.
1058 It also allows to retain current state
1059 if lladdr is unchanged.
1060 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1062 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1063 NTF_ROUTER flag.
1064 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1065 a router.
1067 Caller MUST hold reference count on the entry.
1070 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1071 u32 flags)
1073 u8 old;
1074 int err;
1075 int notify = 0;
1076 struct net_device *dev;
1077 int update_isrouter = 0;
1079 write_lock_bh(&neigh->lock);
1081 dev = neigh->dev;
1082 old = neigh->nud_state;
1083 err = -EPERM;
1085 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1086 (old & (NUD_NOARP | NUD_PERMANENT)))
1087 goto out;
1089 if (!(new & NUD_VALID)) {
1090 neigh_del_timer(neigh);
1091 if (old & NUD_CONNECTED)
1092 neigh_suspect(neigh);
1093 neigh->nud_state = new;
1094 err = 0;
1095 notify = old & NUD_VALID;
1096 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1097 (new & NUD_FAILED)) {
1098 neigh_invalidate(neigh);
1099 notify = 1;
1101 goto out;
1104 /* Compare new lladdr with cached one */
1105 if (!dev->addr_len) {
1106 /* First case: device needs no address. */
1107 lladdr = neigh->ha;
1108 } else if (lladdr) {
1109 /* The second case: if something is already cached
1110 and a new address is proposed:
1111 - compare new & old
1112 - if they are different, check override flag
1114 if ((old & NUD_VALID) &&
1115 !memcmp(lladdr, neigh->ha, dev->addr_len))
1116 lladdr = neigh->ha;
1117 } else {
1118 /* No address is supplied; if we know something,
1119 use it, otherwise discard the request.
1121 err = -EINVAL;
1122 if (!(old & NUD_VALID))
1123 goto out;
1124 lladdr = neigh->ha;
1127 if (new & NUD_CONNECTED)
1128 neigh->confirmed = jiffies;
1129 neigh->updated = jiffies;
1131 /* If entry was valid and address is not changed,
1132 do not change entry state, if new one is STALE.
1134 err = 0;
1135 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1136 if (old & NUD_VALID) {
1137 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1138 update_isrouter = 0;
1139 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1140 (old & NUD_CONNECTED)) {
1141 lladdr = neigh->ha;
1142 new = NUD_STALE;
1143 } else
1144 goto out;
1145 } else {
1146 if (lladdr == neigh->ha && new == NUD_STALE &&
1147 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1148 (old & NUD_CONNECTED))
1150 new = old;
1154 if (new != old) {
1155 neigh_del_timer(neigh);
1156 if (new & NUD_IN_TIMER)
1157 neigh_add_timer(neigh, (jiffies +
1158 ((new & NUD_REACHABLE) ?
1159 neigh->parms->reachable_time :
1160 0)));
1161 neigh->nud_state = new;
1164 if (lladdr != neigh->ha) {
1165 write_seqlock(&neigh->ha_lock);
1166 memcpy(&neigh->ha, lladdr, dev->addr_len);
1167 write_sequnlock(&neigh->ha_lock);
1168 neigh_update_hhs(neigh);
1169 if (!(new & NUD_CONNECTED))
1170 neigh->confirmed = jiffies -
1171 (neigh->parms->base_reachable_time << 1);
1172 notify = 1;
1174 if (new == old)
1175 goto out;
1176 if (new & NUD_CONNECTED)
1177 neigh_connect(neigh);
1178 else
1179 neigh_suspect(neigh);
1180 if (!(old & NUD_VALID)) {
1181 struct sk_buff *skb;
1183 /* Again: avoid dead loop if something went wrong */
1185 while (neigh->nud_state & NUD_VALID &&
1186 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1187 struct dst_entry *dst = skb_dst(skb);
1188 struct neighbour *n2, *n1 = neigh;
1189 write_unlock_bh(&neigh->lock);
1191 rcu_read_lock();
1193 /* Why not just use 'neigh' as-is? The problem is that
1194 * things such as shaper, eql, and sch_teql can end up
1195 * using alternative, different, neigh objects to output
1196 * the packet in the output path. So what we need to do
1197 * here is re-lookup the top-level neigh in the path so
1198 * we can reinject the packet there.
1200 n2 = NULL;
1201 if (dst) {
1202 n2 = dst_neigh_lookup_skb(dst, skb);
1203 if (n2)
1204 n1 = n2;
1206 n1->output(n1, skb);
1207 if (n2)
1208 neigh_release(n2);
1209 rcu_read_unlock();
1211 write_lock_bh(&neigh->lock);
1213 skb_queue_purge(&neigh->arp_queue);
1214 neigh->arp_queue_len_bytes = 0;
1216 out:
1217 if (update_isrouter) {
1218 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1219 (neigh->flags | NTF_ROUTER) :
1220 (neigh->flags & ~NTF_ROUTER);
1222 write_unlock_bh(&neigh->lock);
1224 if (notify)
1225 neigh_update_notify(neigh);
1227 return err;
1229 EXPORT_SYMBOL(neigh_update);
1231 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1232 u8 *lladdr, void *saddr,
1233 struct net_device *dev)
1235 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1236 lladdr || !dev->addr_len);
1237 if (neigh)
1238 neigh_update(neigh, lladdr, NUD_STALE,
1239 NEIGH_UPDATE_F_OVERRIDE);
1240 return neigh;
1242 EXPORT_SYMBOL(neigh_event_ns);
1244 /* called with read_lock_bh(&n->lock); */
1245 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1247 struct net_device *dev = dst->dev;
1248 __be16 prot = dst->ops->protocol;
1249 struct hh_cache *hh = &n->hh;
1251 write_lock_bh(&n->lock);
1253 /* Only one thread can come in here and initialize the
1254 * hh_cache entry.
1256 if (!hh->hh_len)
1257 dev->header_ops->cache(n, hh, prot);
1259 write_unlock_bh(&n->lock);
1262 /* This function can be used in contexts, where only old dev_queue_xmit
1263 * worked, f.e. if you want to override normal output path (eql, shaper),
1264 * but resolution is not made yet.
1267 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1269 struct net_device *dev = skb->dev;
1271 __skb_pull(skb, skb_network_offset(skb));
1273 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1274 skb->len) < 0 &&
1275 dev->header_ops->rebuild(skb))
1276 return 0;
1278 return dev_queue_xmit(skb);
1280 EXPORT_SYMBOL(neigh_compat_output);
1282 /* Slow and careful. */
1284 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1286 struct dst_entry *dst = skb_dst(skb);
1287 int rc = 0;
1289 if (!dst)
1290 goto discard;
1292 if (!neigh_event_send(neigh, skb)) {
1293 int err;
1294 struct net_device *dev = neigh->dev;
1295 unsigned int seq;
1297 if (dev->header_ops->cache && !neigh->hh.hh_len)
1298 neigh_hh_init(neigh, dst);
1300 do {
1301 __skb_pull(skb, skb_network_offset(skb));
1302 seq = read_seqbegin(&neigh->ha_lock);
1303 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1304 neigh->ha, NULL, skb->len);
1305 } while (read_seqretry(&neigh->ha_lock, seq));
1307 if (err >= 0)
1308 rc = dev_queue_xmit(skb);
1309 else
1310 goto out_kfree_skb;
1312 out:
1313 return rc;
1314 discard:
1315 neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1316 out_kfree_skb:
1317 rc = -EINVAL;
1318 kfree_skb(skb);
1319 goto out;
1321 EXPORT_SYMBOL(neigh_resolve_output);
1323 /* As fast as possible without hh cache */
1325 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1327 struct net_device *dev = neigh->dev;
1328 unsigned int seq;
1329 int err;
1331 do {
1332 __skb_pull(skb, skb_network_offset(skb));
1333 seq = read_seqbegin(&neigh->ha_lock);
1334 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1335 neigh->ha, NULL, skb->len);
1336 } while (read_seqretry(&neigh->ha_lock, seq));
1338 if (err >= 0)
1339 err = dev_queue_xmit(skb);
1340 else {
1341 err = -EINVAL;
1342 kfree_skb(skb);
1344 return err;
1346 EXPORT_SYMBOL(neigh_connected_output);
1348 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1350 return dev_queue_xmit(skb);
1352 EXPORT_SYMBOL(neigh_direct_output);
1354 static void neigh_proxy_process(unsigned long arg)
1356 struct neigh_table *tbl = (struct neigh_table *)arg;
1357 long sched_next = 0;
1358 unsigned long now = jiffies;
1359 struct sk_buff *skb, *n;
1361 spin_lock(&tbl->proxy_queue.lock);
1363 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1364 long tdif = NEIGH_CB(skb)->sched_next - now;
1366 if (tdif <= 0) {
1367 struct net_device *dev = skb->dev;
1369 __skb_unlink(skb, &tbl->proxy_queue);
1370 if (tbl->proxy_redo && netif_running(dev)) {
1371 rcu_read_lock();
1372 tbl->proxy_redo(skb);
1373 rcu_read_unlock();
1374 } else {
1375 kfree_skb(skb);
1378 dev_put(dev);
1379 } else if (!sched_next || tdif < sched_next)
1380 sched_next = tdif;
1382 del_timer(&tbl->proxy_timer);
1383 if (sched_next)
1384 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1385 spin_unlock(&tbl->proxy_queue.lock);
1388 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1389 struct sk_buff *skb)
1391 unsigned long now = jiffies;
1392 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1394 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1395 kfree_skb(skb);
1396 return;
1399 NEIGH_CB(skb)->sched_next = sched_next;
1400 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1402 spin_lock(&tbl->proxy_queue.lock);
1403 if (del_timer(&tbl->proxy_timer)) {
1404 if (time_before(tbl->proxy_timer.expires, sched_next))
1405 sched_next = tbl->proxy_timer.expires;
1407 skb_dst_drop(skb);
1408 dev_hold(skb->dev);
1409 __skb_queue_tail(&tbl->proxy_queue, skb);
1410 mod_timer(&tbl->proxy_timer, sched_next);
1411 spin_unlock(&tbl->proxy_queue.lock);
1413 EXPORT_SYMBOL(pneigh_enqueue);
1415 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1416 struct net *net, int ifindex)
1418 struct neigh_parms *p;
1420 for (p = &tbl->parms; p; p = p->next) {
1421 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422 (!p->dev && !ifindex))
1423 return p;
1426 return NULL;
1429 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430 struct neigh_table *tbl)
1432 struct neigh_parms *p, *ref;
1433 struct net *net = dev_net(dev);
1434 const struct net_device_ops *ops = dev->netdev_ops;
1436 ref = lookup_neigh_parms(tbl, net, 0);
1437 if (!ref)
1438 return NULL;
1440 p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1441 if (p) {
1442 p->tbl = tbl;
1443 atomic_set(&p->refcnt, 1);
1444 p->reachable_time =
1445 neigh_rand_reach_time(p->base_reachable_time);
1447 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1448 kfree(p);
1449 return NULL;
1452 dev_hold(dev);
1453 p->dev = dev;
1454 write_pnet(&p->net, hold_net(net));
1455 p->sysctl_table = NULL;
1456 write_lock_bh(&tbl->lock);
1457 p->next = tbl->parms.next;
1458 tbl->parms.next = p;
1459 write_unlock_bh(&tbl->lock);
1461 return p;
1463 EXPORT_SYMBOL(neigh_parms_alloc);
1465 static void neigh_rcu_free_parms(struct rcu_head *head)
1467 struct neigh_parms *parms =
1468 container_of(head, struct neigh_parms, rcu_head);
1470 neigh_parms_put(parms);
1473 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1475 struct neigh_parms **p;
1477 if (!parms || parms == &tbl->parms)
1478 return;
1479 write_lock_bh(&tbl->lock);
1480 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481 if (*p == parms) {
1482 *p = parms->next;
1483 parms->dead = 1;
1484 write_unlock_bh(&tbl->lock);
1485 if (parms->dev)
1486 dev_put(parms->dev);
1487 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488 return;
1491 write_unlock_bh(&tbl->lock);
1492 neigh_dbg(1, "%s: not found\n", __func__);
1494 EXPORT_SYMBOL(neigh_parms_release);
1496 static void neigh_parms_destroy(struct neigh_parms *parms)
1498 release_net(neigh_parms_net(parms));
1499 kfree(parms);
1502 static struct lock_class_key neigh_table_proxy_queue_class;
1504 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1506 unsigned long now = jiffies;
1507 unsigned long phsize;
1509 write_pnet(&tbl->parms.net, &init_net);
1510 atomic_set(&tbl->parms.refcnt, 1);
1511 tbl->parms.reachable_time =
1512 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1514 tbl->stats = alloc_percpu(struct neigh_statistics);
1515 if (!tbl->stats)
1516 panic("cannot create neighbour cache statistics");
1518 #ifdef CONFIG_PROC_FS
1519 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520 &neigh_stat_seq_fops, tbl))
1521 panic("cannot create neighbour proc dir entry");
1522 #endif
1524 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1526 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1529 if (!tbl->nht || !tbl->phash_buckets)
1530 panic("cannot allocate neighbour cache hashes");
1532 if (!tbl->entry_size)
1533 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1534 tbl->key_len, NEIGH_PRIV_ALIGN);
1535 else
1536 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1538 rwlock_init(&tbl->lock);
1539 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1540 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1541 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1542 skb_queue_head_init_class(&tbl->proxy_queue,
1543 &neigh_table_proxy_queue_class);
1545 tbl->last_flush = now;
1546 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1549 void neigh_table_init(struct neigh_table *tbl)
1551 struct neigh_table *tmp;
1553 neigh_table_init_no_netlink(tbl);
1554 write_lock(&neigh_tbl_lock);
1555 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1556 if (tmp->family == tbl->family)
1557 break;
1559 tbl->next = neigh_tables;
1560 neigh_tables = tbl;
1561 write_unlock(&neigh_tbl_lock);
1563 if (unlikely(tmp)) {
1564 pr_err("Registering multiple tables for family %d\n",
1565 tbl->family);
1566 dump_stack();
1569 EXPORT_SYMBOL(neigh_table_init);
1571 int neigh_table_clear(struct neigh_table *tbl)
1573 struct neigh_table **tp;
1575 /* It is not clean... Fix it to unload IPv6 module safely */
1576 cancel_delayed_work_sync(&tbl->gc_work);
1577 del_timer_sync(&tbl->proxy_timer);
1578 pneigh_queue_purge(&tbl->proxy_queue);
1579 neigh_ifdown(tbl, NULL);
1580 if (atomic_read(&tbl->entries))
1581 pr_crit("neighbour leakage\n");
1582 write_lock(&neigh_tbl_lock);
1583 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1584 if (*tp == tbl) {
1585 *tp = tbl->next;
1586 break;
1589 write_unlock(&neigh_tbl_lock);
1591 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1592 neigh_hash_free_rcu);
1593 tbl->nht = NULL;
1595 kfree(tbl->phash_buckets);
1596 tbl->phash_buckets = NULL;
1598 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1600 free_percpu(tbl->stats);
1601 tbl->stats = NULL;
1603 return 0;
1605 EXPORT_SYMBOL(neigh_table_clear);
1607 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1609 struct net *net = sock_net(skb->sk);
1610 struct ndmsg *ndm;
1611 struct nlattr *dst_attr;
1612 struct neigh_table *tbl;
1613 struct net_device *dev = NULL;
1614 int err = -EINVAL;
1616 ASSERT_RTNL();
1617 if (nlmsg_len(nlh) < sizeof(*ndm))
1618 goto out;
1620 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1621 if (dst_attr == NULL)
1622 goto out;
1624 ndm = nlmsg_data(nlh);
1625 if (ndm->ndm_ifindex) {
1626 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1627 if (dev == NULL) {
1628 err = -ENODEV;
1629 goto out;
1633 read_lock(&neigh_tbl_lock);
1634 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1635 struct neighbour *neigh;
1637 if (tbl->family != ndm->ndm_family)
1638 continue;
1639 read_unlock(&neigh_tbl_lock);
1641 if (nla_len(dst_attr) < tbl->key_len)
1642 goto out;
1644 if (ndm->ndm_flags & NTF_PROXY) {
1645 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1646 goto out;
1649 if (dev == NULL)
1650 goto out;
1652 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1653 if (neigh == NULL) {
1654 err = -ENOENT;
1655 goto out;
1658 err = neigh_update(neigh, NULL, NUD_FAILED,
1659 NEIGH_UPDATE_F_OVERRIDE |
1660 NEIGH_UPDATE_F_ADMIN);
1661 neigh_release(neigh);
1662 goto out;
1664 read_unlock(&neigh_tbl_lock);
1665 err = -EAFNOSUPPORT;
1667 out:
1668 return err;
1671 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1673 struct net *net = sock_net(skb->sk);
1674 struct ndmsg *ndm;
1675 struct nlattr *tb[NDA_MAX+1];
1676 struct neigh_table *tbl;
1677 struct net_device *dev = NULL;
1678 int err;
1680 ASSERT_RTNL();
1681 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1682 if (err < 0)
1683 goto out;
1685 err = -EINVAL;
1686 if (tb[NDA_DST] == NULL)
1687 goto out;
1689 ndm = nlmsg_data(nlh);
1690 if (ndm->ndm_ifindex) {
1691 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1692 if (dev == NULL) {
1693 err = -ENODEV;
1694 goto out;
1697 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1698 goto out;
1701 read_lock(&neigh_tbl_lock);
1702 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1703 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1704 struct neighbour *neigh;
1705 void *dst, *lladdr;
1707 if (tbl->family != ndm->ndm_family)
1708 continue;
1709 read_unlock(&neigh_tbl_lock);
1711 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1712 goto out;
1713 dst = nla_data(tb[NDA_DST]);
1714 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1716 if (ndm->ndm_flags & NTF_PROXY) {
1717 struct pneigh_entry *pn;
1719 err = -ENOBUFS;
1720 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1721 if (pn) {
1722 pn->flags = ndm->ndm_flags;
1723 err = 0;
1725 goto out;
1728 if (dev == NULL)
1729 goto out;
1731 neigh = neigh_lookup(tbl, dst, dev);
1732 if (neigh == NULL) {
1733 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1734 err = -ENOENT;
1735 goto out;
1738 neigh = __neigh_lookup_errno(tbl, dst, dev);
1739 if (IS_ERR(neigh)) {
1740 err = PTR_ERR(neigh);
1741 goto out;
1743 } else {
1744 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1745 err = -EEXIST;
1746 neigh_release(neigh);
1747 goto out;
1750 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1751 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1754 if (ndm->ndm_flags & NTF_USE) {
1755 neigh_event_send(neigh, NULL);
1756 err = 0;
1757 } else
1758 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1759 neigh_release(neigh);
1760 goto out;
1763 read_unlock(&neigh_tbl_lock);
1764 err = -EAFNOSUPPORT;
1765 out:
1766 return err;
1769 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1771 struct nlattr *nest;
1773 nest = nla_nest_start(skb, NDTA_PARMS);
1774 if (nest == NULL)
1775 return -ENOBUFS;
1777 if ((parms->dev &&
1778 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1779 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1780 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1781 /* approximative value for deprecated QUEUE_LEN (in packets) */
1782 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1783 parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1784 nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1785 nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1786 nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1787 nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1788 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1789 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1790 parms->base_reachable_time) ||
1791 nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1792 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1793 parms->delay_probe_time) ||
1794 nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1795 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1796 nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1797 nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1798 goto nla_put_failure;
1799 return nla_nest_end(skb, nest);
1801 nla_put_failure:
1802 nla_nest_cancel(skb, nest);
1803 return -EMSGSIZE;
1806 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1807 u32 pid, u32 seq, int type, int flags)
1809 struct nlmsghdr *nlh;
1810 struct ndtmsg *ndtmsg;
1812 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1813 if (nlh == NULL)
1814 return -EMSGSIZE;
1816 ndtmsg = nlmsg_data(nlh);
1818 read_lock_bh(&tbl->lock);
1819 ndtmsg->ndtm_family = tbl->family;
1820 ndtmsg->ndtm_pad1 = 0;
1821 ndtmsg->ndtm_pad2 = 0;
1823 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1824 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1825 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1826 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1827 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1828 goto nla_put_failure;
1830 unsigned long now = jiffies;
1831 unsigned int flush_delta = now - tbl->last_flush;
1832 unsigned int rand_delta = now - tbl->last_rand;
1833 struct neigh_hash_table *nht;
1834 struct ndt_config ndc = {
1835 .ndtc_key_len = tbl->key_len,
1836 .ndtc_entry_size = tbl->entry_size,
1837 .ndtc_entries = atomic_read(&tbl->entries),
1838 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1839 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1840 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1843 rcu_read_lock_bh();
1844 nht = rcu_dereference_bh(tbl->nht);
1845 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1846 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1847 rcu_read_unlock_bh();
1849 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1850 goto nla_put_failure;
1854 int cpu;
1855 struct ndt_stats ndst;
1857 memset(&ndst, 0, sizeof(ndst));
1859 for_each_possible_cpu(cpu) {
1860 struct neigh_statistics *st;
1862 st = per_cpu_ptr(tbl->stats, cpu);
1863 ndst.ndts_allocs += st->allocs;
1864 ndst.ndts_destroys += st->destroys;
1865 ndst.ndts_hash_grows += st->hash_grows;
1866 ndst.ndts_res_failed += st->res_failed;
1867 ndst.ndts_lookups += st->lookups;
1868 ndst.ndts_hits += st->hits;
1869 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1870 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1871 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1872 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1875 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1876 goto nla_put_failure;
1879 BUG_ON(tbl->parms.dev);
1880 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1881 goto nla_put_failure;
1883 read_unlock_bh(&tbl->lock);
1884 return nlmsg_end(skb, nlh);
1886 nla_put_failure:
1887 read_unlock_bh(&tbl->lock);
1888 nlmsg_cancel(skb, nlh);
1889 return -EMSGSIZE;
1892 static int neightbl_fill_param_info(struct sk_buff *skb,
1893 struct neigh_table *tbl,
1894 struct neigh_parms *parms,
1895 u32 pid, u32 seq, int type,
1896 unsigned int flags)
1898 struct ndtmsg *ndtmsg;
1899 struct nlmsghdr *nlh;
1901 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1902 if (nlh == NULL)
1903 return -EMSGSIZE;
1905 ndtmsg = nlmsg_data(nlh);
1907 read_lock_bh(&tbl->lock);
1908 ndtmsg->ndtm_family = tbl->family;
1909 ndtmsg->ndtm_pad1 = 0;
1910 ndtmsg->ndtm_pad2 = 0;
1912 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1913 neightbl_fill_parms(skb, parms) < 0)
1914 goto errout;
1916 read_unlock_bh(&tbl->lock);
1917 return nlmsg_end(skb, nlh);
1918 errout:
1919 read_unlock_bh(&tbl->lock);
1920 nlmsg_cancel(skb, nlh);
1921 return -EMSGSIZE;
1924 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1925 [NDTA_NAME] = { .type = NLA_STRING },
1926 [NDTA_THRESH1] = { .type = NLA_U32 },
1927 [NDTA_THRESH2] = { .type = NLA_U32 },
1928 [NDTA_THRESH3] = { .type = NLA_U32 },
1929 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1930 [NDTA_PARMS] = { .type = NLA_NESTED },
1933 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1934 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1935 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1936 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1937 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1938 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1939 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1940 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1941 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1942 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1943 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1944 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1945 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1946 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1949 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1951 struct net *net = sock_net(skb->sk);
1952 struct neigh_table *tbl;
1953 struct ndtmsg *ndtmsg;
1954 struct nlattr *tb[NDTA_MAX+1];
1955 int err;
1957 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1958 nl_neightbl_policy);
1959 if (err < 0)
1960 goto errout;
1962 if (tb[NDTA_NAME] == NULL) {
1963 err = -EINVAL;
1964 goto errout;
1967 ndtmsg = nlmsg_data(nlh);
1968 read_lock(&neigh_tbl_lock);
1969 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1970 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1971 continue;
1973 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1974 break;
1977 if (tbl == NULL) {
1978 err = -ENOENT;
1979 goto errout_locked;
1983 * We acquire tbl->lock to be nice to the periodic timers and
1984 * make sure they always see a consistent set of values.
1986 write_lock_bh(&tbl->lock);
1988 if (tb[NDTA_PARMS]) {
1989 struct nlattr *tbp[NDTPA_MAX+1];
1990 struct neigh_parms *p;
1991 int i, ifindex = 0;
1993 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1994 nl_ntbl_parm_policy);
1995 if (err < 0)
1996 goto errout_tbl_lock;
1998 if (tbp[NDTPA_IFINDEX])
1999 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2001 p = lookup_neigh_parms(tbl, net, ifindex);
2002 if (p == NULL) {
2003 err = -ENOENT;
2004 goto errout_tbl_lock;
2007 for (i = 1; i <= NDTPA_MAX; i++) {
2008 if (tbp[i] == NULL)
2009 continue;
2011 switch (i) {
2012 case NDTPA_QUEUE_LEN:
2013 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2014 SKB_TRUESIZE(ETH_FRAME_LEN);
2015 break;
2016 case NDTPA_QUEUE_LENBYTES:
2017 p->queue_len_bytes = nla_get_u32(tbp[i]);
2018 break;
2019 case NDTPA_PROXY_QLEN:
2020 p->proxy_qlen = nla_get_u32(tbp[i]);
2021 break;
2022 case NDTPA_APP_PROBES:
2023 p->app_probes = nla_get_u32(tbp[i]);
2024 break;
2025 case NDTPA_UCAST_PROBES:
2026 p->ucast_probes = nla_get_u32(tbp[i]);
2027 break;
2028 case NDTPA_MCAST_PROBES:
2029 p->mcast_probes = nla_get_u32(tbp[i]);
2030 break;
2031 case NDTPA_BASE_REACHABLE_TIME:
2032 p->base_reachable_time = nla_get_msecs(tbp[i]);
2033 break;
2034 case NDTPA_GC_STALETIME:
2035 p->gc_staletime = nla_get_msecs(tbp[i]);
2036 break;
2037 case NDTPA_DELAY_PROBE_TIME:
2038 p->delay_probe_time = nla_get_msecs(tbp[i]);
2039 break;
2040 case NDTPA_RETRANS_TIME:
2041 p->retrans_time = nla_get_msecs(tbp[i]);
2042 break;
2043 case NDTPA_ANYCAST_DELAY:
2044 p->anycast_delay = nla_get_msecs(tbp[i]);
2045 break;
2046 case NDTPA_PROXY_DELAY:
2047 p->proxy_delay = nla_get_msecs(tbp[i]);
2048 break;
2049 case NDTPA_LOCKTIME:
2050 p->locktime = nla_get_msecs(tbp[i]);
2051 break;
2056 if (tb[NDTA_THRESH1])
2057 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2059 if (tb[NDTA_THRESH2])
2060 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2062 if (tb[NDTA_THRESH3])
2063 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2065 if (tb[NDTA_GC_INTERVAL])
2066 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2068 err = 0;
2070 errout_tbl_lock:
2071 write_unlock_bh(&tbl->lock);
2072 errout_locked:
2073 read_unlock(&neigh_tbl_lock);
2074 errout:
2075 return err;
2078 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2080 struct net *net = sock_net(skb->sk);
2081 int family, tidx, nidx = 0;
2082 int tbl_skip = cb->args[0];
2083 int neigh_skip = cb->args[1];
2084 struct neigh_table *tbl;
2086 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2088 read_lock(&neigh_tbl_lock);
2089 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2090 struct neigh_parms *p;
2092 if (tidx < tbl_skip || (family && tbl->family != family))
2093 continue;
2095 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2096 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2097 NLM_F_MULTI) <= 0)
2098 break;
2100 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2101 if (!net_eq(neigh_parms_net(p), net))
2102 continue;
2104 if (nidx < neigh_skip)
2105 goto next;
2107 if (neightbl_fill_param_info(skb, tbl, p,
2108 NETLINK_CB(cb->skb).portid,
2109 cb->nlh->nlmsg_seq,
2110 RTM_NEWNEIGHTBL,
2111 NLM_F_MULTI) <= 0)
2112 goto out;
2113 next:
2114 nidx++;
2117 neigh_skip = 0;
2119 out:
2120 read_unlock(&neigh_tbl_lock);
2121 cb->args[0] = tidx;
2122 cb->args[1] = nidx;
2124 return skb->len;
2127 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2128 u32 pid, u32 seq, int type, unsigned int flags)
2130 unsigned long now = jiffies;
2131 struct nda_cacheinfo ci;
2132 struct nlmsghdr *nlh;
2133 struct ndmsg *ndm;
2135 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2136 if (nlh == NULL)
2137 return -EMSGSIZE;
2139 ndm = nlmsg_data(nlh);
2140 ndm->ndm_family = neigh->ops->family;
2141 ndm->ndm_pad1 = 0;
2142 ndm->ndm_pad2 = 0;
2143 ndm->ndm_flags = neigh->flags;
2144 ndm->ndm_type = neigh->type;
2145 ndm->ndm_ifindex = neigh->dev->ifindex;
2147 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2148 goto nla_put_failure;
2150 read_lock_bh(&neigh->lock);
2151 ndm->ndm_state = neigh->nud_state;
2152 if (neigh->nud_state & NUD_VALID) {
2153 char haddr[MAX_ADDR_LEN];
2155 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2156 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2157 read_unlock_bh(&neigh->lock);
2158 goto nla_put_failure;
2162 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2163 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2164 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2165 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2166 read_unlock_bh(&neigh->lock);
2168 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2169 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2170 goto nla_put_failure;
2172 return nlmsg_end(skb, nlh);
2174 nla_put_failure:
2175 nlmsg_cancel(skb, nlh);
2176 return -EMSGSIZE;
2179 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2180 u32 pid, u32 seq, int type, unsigned int flags,
2181 struct neigh_table *tbl)
2183 struct nlmsghdr *nlh;
2184 struct ndmsg *ndm;
2186 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2187 if (nlh == NULL)
2188 return -EMSGSIZE;
2190 ndm = nlmsg_data(nlh);
2191 ndm->ndm_family = tbl->family;
2192 ndm->ndm_pad1 = 0;
2193 ndm->ndm_pad2 = 0;
2194 ndm->ndm_flags = pn->flags | NTF_PROXY;
2195 ndm->ndm_type = NDA_DST;
2196 ndm->ndm_ifindex = pn->dev->ifindex;
2197 ndm->ndm_state = NUD_NONE;
2199 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2200 goto nla_put_failure;
2202 return nlmsg_end(skb, nlh);
2204 nla_put_failure:
2205 nlmsg_cancel(skb, nlh);
2206 return -EMSGSIZE;
2209 static void neigh_update_notify(struct neighbour *neigh)
2211 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2212 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2215 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2216 struct netlink_callback *cb)
2218 struct net *net = sock_net(skb->sk);
2219 struct neighbour *n;
2220 int rc, h, s_h = cb->args[1];
2221 int idx, s_idx = idx = cb->args[2];
2222 struct neigh_hash_table *nht;
2224 rcu_read_lock_bh();
2225 nht = rcu_dereference_bh(tbl->nht);
2227 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2228 if (h > s_h)
2229 s_idx = 0;
2230 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2231 n != NULL;
2232 n = rcu_dereference_bh(n->next)) {
2233 if (!net_eq(dev_net(n->dev), net))
2234 continue;
2235 if (idx < s_idx)
2236 goto next;
2237 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2238 cb->nlh->nlmsg_seq,
2239 RTM_NEWNEIGH,
2240 NLM_F_MULTI) <= 0) {
2241 rc = -1;
2242 goto out;
2244 next:
2245 idx++;
2248 rc = skb->len;
2249 out:
2250 rcu_read_unlock_bh();
2251 cb->args[1] = h;
2252 cb->args[2] = idx;
2253 return rc;
2256 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2257 struct netlink_callback *cb)
2259 struct pneigh_entry *n;
2260 struct net *net = sock_net(skb->sk);
2261 int rc, h, s_h = cb->args[3];
2262 int idx, s_idx = idx = cb->args[4];
2264 read_lock_bh(&tbl->lock);
2266 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2267 if (h > s_h)
2268 s_idx = 0;
2269 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2270 if (dev_net(n->dev) != net)
2271 continue;
2272 if (idx < s_idx)
2273 goto next;
2274 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2275 cb->nlh->nlmsg_seq,
2276 RTM_NEWNEIGH,
2277 NLM_F_MULTI, tbl) <= 0) {
2278 read_unlock_bh(&tbl->lock);
2279 rc = -1;
2280 goto out;
2282 next:
2283 idx++;
2287 read_unlock_bh(&tbl->lock);
2288 rc = skb->len;
2289 out:
2290 cb->args[3] = h;
2291 cb->args[4] = idx;
2292 return rc;
2296 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2298 struct neigh_table *tbl;
2299 int t, family, s_t;
2300 int proxy = 0;
2301 int err;
2303 read_lock(&neigh_tbl_lock);
2304 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2306 /* check for full ndmsg structure presence, family member is
2307 * the same for both structures
2309 if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2310 ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2311 proxy = 1;
2313 s_t = cb->args[0];
2315 for (tbl = neigh_tables, t = 0; tbl;
2316 tbl = tbl->next, t++) {
2317 if (t < s_t || (family && tbl->family != family))
2318 continue;
2319 if (t > s_t)
2320 memset(&cb->args[1], 0, sizeof(cb->args) -
2321 sizeof(cb->args[0]));
2322 if (proxy)
2323 err = pneigh_dump_table(tbl, skb, cb);
2324 else
2325 err = neigh_dump_table(tbl, skb, cb);
2326 if (err < 0)
2327 break;
2329 read_unlock(&neigh_tbl_lock);
2331 cb->args[0] = t;
2332 return skb->len;
2335 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2337 int chain;
2338 struct neigh_hash_table *nht;
2340 rcu_read_lock_bh();
2341 nht = rcu_dereference_bh(tbl->nht);
2343 read_lock(&tbl->lock); /* avoid resizes */
2344 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2345 struct neighbour *n;
2347 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2348 n != NULL;
2349 n = rcu_dereference_bh(n->next))
2350 cb(n, cookie);
2352 read_unlock(&tbl->lock);
2353 rcu_read_unlock_bh();
2355 EXPORT_SYMBOL(neigh_for_each);
2357 /* The tbl->lock must be held as a writer and BH disabled. */
2358 void __neigh_for_each_release(struct neigh_table *tbl,
2359 int (*cb)(struct neighbour *))
2361 int chain;
2362 struct neigh_hash_table *nht;
2364 nht = rcu_dereference_protected(tbl->nht,
2365 lockdep_is_held(&tbl->lock));
2366 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2367 struct neighbour *n;
2368 struct neighbour __rcu **np;
2370 np = &nht->hash_buckets[chain];
2371 while ((n = rcu_dereference_protected(*np,
2372 lockdep_is_held(&tbl->lock))) != NULL) {
2373 int release;
2375 write_lock(&n->lock);
2376 release = cb(n);
2377 if (release) {
2378 rcu_assign_pointer(*np,
2379 rcu_dereference_protected(n->next,
2380 lockdep_is_held(&tbl->lock)));
2381 n->dead = 1;
2382 } else
2383 np = &n->next;
2384 write_unlock(&n->lock);
2385 if (release)
2386 neigh_cleanup_and_release(n);
2390 EXPORT_SYMBOL(__neigh_for_each_release);
2392 #ifdef CONFIG_PROC_FS
2394 static struct neighbour *neigh_get_first(struct seq_file *seq)
2396 struct neigh_seq_state *state = seq->private;
2397 struct net *net = seq_file_net(seq);
2398 struct neigh_hash_table *nht = state->nht;
2399 struct neighbour *n = NULL;
2400 int bucket = state->bucket;
2402 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2403 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2404 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2406 while (n) {
2407 if (!net_eq(dev_net(n->dev), net))
2408 goto next;
2409 if (state->neigh_sub_iter) {
2410 loff_t fakep = 0;
2411 void *v;
2413 v = state->neigh_sub_iter(state, n, &fakep);
2414 if (!v)
2415 goto next;
2417 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2418 break;
2419 if (n->nud_state & ~NUD_NOARP)
2420 break;
2421 next:
2422 n = rcu_dereference_bh(n->next);
2425 if (n)
2426 break;
2428 state->bucket = bucket;
2430 return n;
2433 static struct neighbour *neigh_get_next(struct seq_file *seq,
2434 struct neighbour *n,
2435 loff_t *pos)
2437 struct neigh_seq_state *state = seq->private;
2438 struct net *net = seq_file_net(seq);
2439 struct neigh_hash_table *nht = state->nht;
2441 if (state->neigh_sub_iter) {
2442 void *v = state->neigh_sub_iter(state, n, pos);
2443 if (v)
2444 return n;
2446 n = rcu_dereference_bh(n->next);
2448 while (1) {
2449 while (n) {
2450 if (!net_eq(dev_net(n->dev), net))
2451 goto next;
2452 if (state->neigh_sub_iter) {
2453 void *v = state->neigh_sub_iter(state, n, pos);
2454 if (v)
2455 return n;
2456 goto next;
2458 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2459 break;
2461 if (n->nud_state & ~NUD_NOARP)
2462 break;
2463 next:
2464 n = rcu_dereference_bh(n->next);
2467 if (n)
2468 break;
2470 if (++state->bucket >= (1 << nht->hash_shift))
2471 break;
2473 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2476 if (n && pos)
2477 --(*pos);
2478 return n;
2481 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2483 struct neighbour *n = neigh_get_first(seq);
2485 if (n) {
2486 --(*pos);
2487 while (*pos) {
2488 n = neigh_get_next(seq, n, pos);
2489 if (!n)
2490 break;
2493 return *pos ? NULL : n;
2496 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2498 struct neigh_seq_state *state = seq->private;
2499 struct net *net = seq_file_net(seq);
2500 struct neigh_table *tbl = state->tbl;
2501 struct pneigh_entry *pn = NULL;
2502 int bucket = state->bucket;
2504 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2505 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2506 pn = tbl->phash_buckets[bucket];
2507 while (pn && !net_eq(pneigh_net(pn), net))
2508 pn = pn->next;
2509 if (pn)
2510 break;
2512 state->bucket = bucket;
2514 return pn;
2517 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2518 struct pneigh_entry *pn,
2519 loff_t *pos)
2521 struct neigh_seq_state *state = seq->private;
2522 struct net *net = seq_file_net(seq);
2523 struct neigh_table *tbl = state->tbl;
2525 do {
2526 pn = pn->next;
2527 } while (pn && !net_eq(pneigh_net(pn), net));
2529 while (!pn) {
2530 if (++state->bucket > PNEIGH_HASHMASK)
2531 break;
2532 pn = tbl->phash_buckets[state->bucket];
2533 while (pn && !net_eq(pneigh_net(pn), net))
2534 pn = pn->next;
2535 if (pn)
2536 break;
2539 if (pn && pos)
2540 --(*pos);
2542 return pn;
2545 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2547 struct pneigh_entry *pn = pneigh_get_first(seq);
2549 if (pn) {
2550 --(*pos);
2551 while (*pos) {
2552 pn = pneigh_get_next(seq, pn, pos);
2553 if (!pn)
2554 break;
2557 return *pos ? NULL : pn;
2560 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2562 struct neigh_seq_state *state = seq->private;
2563 void *rc;
2564 loff_t idxpos = *pos;
2566 rc = neigh_get_idx(seq, &idxpos);
2567 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2568 rc = pneigh_get_idx(seq, &idxpos);
2570 return rc;
2573 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2574 __acquires(rcu_bh)
2576 struct neigh_seq_state *state = seq->private;
2578 state->tbl = tbl;
2579 state->bucket = 0;
2580 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2582 rcu_read_lock_bh();
2583 state->nht = rcu_dereference_bh(tbl->nht);
2585 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2587 EXPORT_SYMBOL(neigh_seq_start);
2589 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2591 struct neigh_seq_state *state;
2592 void *rc;
2594 if (v == SEQ_START_TOKEN) {
2595 rc = neigh_get_first(seq);
2596 goto out;
2599 state = seq->private;
2600 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2601 rc = neigh_get_next(seq, v, NULL);
2602 if (rc)
2603 goto out;
2604 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2605 rc = pneigh_get_first(seq);
2606 } else {
2607 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2608 rc = pneigh_get_next(seq, v, NULL);
2610 out:
2611 ++(*pos);
2612 return rc;
2614 EXPORT_SYMBOL(neigh_seq_next);
2616 void neigh_seq_stop(struct seq_file *seq, void *v)
2617 __releases(rcu_bh)
2619 rcu_read_unlock_bh();
2621 EXPORT_SYMBOL(neigh_seq_stop);
2623 /* statistics via seq_file */
2625 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2627 struct neigh_table *tbl = seq->private;
2628 int cpu;
2630 if (*pos == 0)
2631 return SEQ_START_TOKEN;
2633 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2634 if (!cpu_possible(cpu))
2635 continue;
2636 *pos = cpu+1;
2637 return per_cpu_ptr(tbl->stats, cpu);
2639 return NULL;
2642 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2644 struct neigh_table *tbl = seq->private;
2645 int cpu;
2647 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2648 if (!cpu_possible(cpu))
2649 continue;
2650 *pos = cpu+1;
2651 return per_cpu_ptr(tbl->stats, cpu);
2653 return NULL;
2656 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2661 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2663 struct neigh_table *tbl = seq->private;
2664 struct neigh_statistics *st = v;
2666 if (v == SEQ_START_TOKEN) {
2667 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
2668 return 0;
2671 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2672 "%08lx %08lx %08lx %08lx %08lx\n",
2673 atomic_read(&tbl->entries),
2675 st->allocs,
2676 st->destroys,
2677 st->hash_grows,
2679 st->lookups,
2680 st->hits,
2682 st->res_failed,
2684 st->rcv_probes_mcast,
2685 st->rcv_probes_ucast,
2687 st->periodic_gc_runs,
2688 st->forced_gc_runs,
2689 st->unres_discards
2692 return 0;
2695 static const struct seq_operations neigh_stat_seq_ops = {
2696 .start = neigh_stat_seq_start,
2697 .next = neigh_stat_seq_next,
2698 .stop = neigh_stat_seq_stop,
2699 .show = neigh_stat_seq_show,
2702 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2704 int ret = seq_open(file, &neigh_stat_seq_ops);
2706 if (!ret) {
2707 struct seq_file *sf = file->private_data;
2708 sf->private = PDE_DATA(inode);
2710 return ret;
2713 static const struct file_operations neigh_stat_seq_fops = {
2714 .owner = THIS_MODULE,
2715 .open = neigh_stat_seq_open,
2716 .read = seq_read,
2717 .llseek = seq_lseek,
2718 .release = seq_release,
2721 #endif /* CONFIG_PROC_FS */
2723 static inline size_t neigh_nlmsg_size(void)
2725 return NLMSG_ALIGN(sizeof(struct ndmsg))
2726 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2727 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2728 + nla_total_size(sizeof(struct nda_cacheinfo))
2729 + nla_total_size(4); /* NDA_PROBES */
2732 static void __neigh_notify(struct neighbour *n, int type, int flags)
2734 struct net *net = dev_net(n->dev);
2735 struct sk_buff *skb;
2736 int err = -ENOBUFS;
2738 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2739 if (skb == NULL)
2740 goto errout;
2742 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2743 if (err < 0) {
2744 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2745 WARN_ON(err == -EMSGSIZE);
2746 kfree_skb(skb);
2747 goto errout;
2749 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2750 return;
2751 errout:
2752 if (err < 0)
2753 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2756 #ifdef CONFIG_ARPD
2757 void neigh_app_ns(struct neighbour *n)
2759 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2761 EXPORT_SYMBOL(neigh_app_ns);
2762 #endif /* CONFIG_ARPD */
2764 #ifdef CONFIG_SYSCTL
2765 static int zero;
2766 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2768 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2769 size_t *lenp, loff_t *ppos)
2771 int size, ret;
2772 ctl_table tmp = *ctl;
2774 tmp.extra1 = &zero;
2775 tmp.extra2 = &unres_qlen_max;
2776 tmp.data = &size;
2778 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2779 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2781 if (write && !ret)
2782 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2783 return ret;
2786 enum {
2787 NEIGH_VAR_MCAST_PROBE,
2788 NEIGH_VAR_UCAST_PROBE,
2789 NEIGH_VAR_APP_PROBE,
2790 NEIGH_VAR_RETRANS_TIME,
2791 NEIGH_VAR_BASE_REACHABLE_TIME,
2792 NEIGH_VAR_DELAY_PROBE_TIME,
2793 NEIGH_VAR_GC_STALETIME,
2794 NEIGH_VAR_QUEUE_LEN,
2795 NEIGH_VAR_QUEUE_LEN_BYTES,
2796 NEIGH_VAR_PROXY_QLEN,
2797 NEIGH_VAR_ANYCAST_DELAY,
2798 NEIGH_VAR_PROXY_DELAY,
2799 NEIGH_VAR_LOCKTIME,
2800 NEIGH_VAR_RETRANS_TIME_MS,
2801 NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2802 NEIGH_VAR_GC_INTERVAL,
2803 NEIGH_VAR_GC_THRESH1,
2804 NEIGH_VAR_GC_THRESH2,
2805 NEIGH_VAR_GC_THRESH3,
2806 NEIGH_VAR_MAX
2809 static struct neigh_sysctl_table {
2810 struct ctl_table_header *sysctl_header;
2811 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2812 } neigh_sysctl_template __read_mostly = {
2813 .neigh_vars = {
2814 [NEIGH_VAR_MCAST_PROBE] = {
2815 .procname = "mcast_solicit",
2816 .maxlen = sizeof(int),
2817 .mode = 0644,
2818 .proc_handler = proc_dointvec,
2820 [NEIGH_VAR_UCAST_PROBE] = {
2821 .procname = "ucast_solicit",
2822 .maxlen = sizeof(int),
2823 .mode = 0644,
2824 .proc_handler = proc_dointvec,
2826 [NEIGH_VAR_APP_PROBE] = {
2827 .procname = "app_solicit",
2828 .maxlen = sizeof(int),
2829 .mode = 0644,
2830 .proc_handler = proc_dointvec,
2832 [NEIGH_VAR_RETRANS_TIME] = {
2833 .procname = "retrans_time",
2834 .maxlen = sizeof(int),
2835 .mode = 0644,
2836 .proc_handler = proc_dointvec_userhz_jiffies,
2838 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2839 .procname = "base_reachable_time",
2840 .maxlen = sizeof(int),
2841 .mode = 0644,
2842 .proc_handler = proc_dointvec_jiffies,
2844 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2845 .procname = "delay_first_probe_time",
2846 .maxlen = sizeof(int),
2847 .mode = 0644,
2848 .proc_handler = proc_dointvec_jiffies,
2850 [NEIGH_VAR_GC_STALETIME] = {
2851 .procname = "gc_stale_time",
2852 .maxlen = sizeof(int),
2853 .mode = 0644,
2854 .proc_handler = proc_dointvec_jiffies,
2856 [NEIGH_VAR_QUEUE_LEN] = {
2857 .procname = "unres_qlen",
2858 .maxlen = sizeof(int),
2859 .mode = 0644,
2860 .proc_handler = proc_unres_qlen,
2862 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2863 .procname = "unres_qlen_bytes",
2864 .maxlen = sizeof(int),
2865 .mode = 0644,
2866 .extra1 = &zero,
2867 .proc_handler = proc_dointvec_minmax,
2869 [NEIGH_VAR_PROXY_QLEN] = {
2870 .procname = "proxy_qlen",
2871 .maxlen = sizeof(int),
2872 .mode = 0644,
2873 .proc_handler = proc_dointvec,
2875 [NEIGH_VAR_ANYCAST_DELAY] = {
2876 .procname = "anycast_delay",
2877 .maxlen = sizeof(int),
2878 .mode = 0644,
2879 .proc_handler = proc_dointvec_userhz_jiffies,
2881 [NEIGH_VAR_PROXY_DELAY] = {
2882 .procname = "proxy_delay",
2883 .maxlen = sizeof(int),
2884 .mode = 0644,
2885 .proc_handler = proc_dointvec_userhz_jiffies,
2887 [NEIGH_VAR_LOCKTIME] = {
2888 .procname = "locktime",
2889 .maxlen = sizeof(int),
2890 .mode = 0644,
2891 .proc_handler = proc_dointvec_userhz_jiffies,
2893 [NEIGH_VAR_RETRANS_TIME_MS] = {
2894 .procname = "retrans_time_ms",
2895 .maxlen = sizeof(int),
2896 .mode = 0644,
2897 .proc_handler = proc_dointvec_ms_jiffies,
2899 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2900 .procname = "base_reachable_time_ms",
2901 .maxlen = sizeof(int),
2902 .mode = 0644,
2903 .proc_handler = proc_dointvec_ms_jiffies,
2905 [NEIGH_VAR_GC_INTERVAL] = {
2906 .procname = "gc_interval",
2907 .maxlen = sizeof(int),
2908 .mode = 0644,
2909 .proc_handler = proc_dointvec_jiffies,
2911 [NEIGH_VAR_GC_THRESH1] = {
2912 .procname = "gc_thresh1",
2913 .maxlen = sizeof(int),
2914 .mode = 0644,
2915 .proc_handler = proc_dointvec,
2917 [NEIGH_VAR_GC_THRESH2] = {
2918 .procname = "gc_thresh2",
2919 .maxlen = sizeof(int),
2920 .mode = 0644,
2921 .proc_handler = proc_dointvec,
2923 [NEIGH_VAR_GC_THRESH3] = {
2924 .procname = "gc_thresh3",
2925 .maxlen = sizeof(int),
2926 .mode = 0644,
2927 .proc_handler = proc_dointvec,
2933 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2934 char *p_name, proc_handler *handler)
2936 struct neigh_sysctl_table *t;
2937 const char *dev_name_source = NULL;
2938 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2940 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2941 if (!t)
2942 goto err;
2944 t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data = &p->mcast_probes;
2945 t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data = &p->ucast_probes;
2946 t->neigh_vars[NEIGH_VAR_APP_PROBE].data = &p->app_probes;
2947 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data = &p->retrans_time;
2948 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data = &p->base_reachable_time;
2949 t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data = &p->delay_probe_time;
2950 t->neigh_vars[NEIGH_VAR_GC_STALETIME].data = &p->gc_staletime;
2951 t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data = &p->queue_len_bytes;
2952 t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data = &p->queue_len_bytes;
2953 t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data = &p->proxy_qlen;
2954 t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data = &p->anycast_delay;
2955 t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2956 t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2957 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data = &p->retrans_time;
2958 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data = &p->base_reachable_time;
2960 if (dev) {
2961 dev_name_source = dev->name;
2962 /* Terminate the table early */
2963 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2964 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2965 } else {
2966 dev_name_source = "default";
2967 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2968 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2969 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2970 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2974 if (handler) {
2975 /* RetransTime */
2976 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2977 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2978 /* ReachableTime */
2979 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2980 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2981 /* RetransTime (in milliseconds)*/
2982 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2983 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2984 /* ReachableTime (in milliseconds) */
2985 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2986 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2989 /* Don't export sysctls to unprivileged users */
2990 if (neigh_parms_net(p)->user_ns != &init_user_ns)
2991 t->neigh_vars[0].procname = NULL;
2993 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2994 p_name, dev_name_source);
2995 t->sysctl_header =
2996 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2997 if (!t->sysctl_header)
2998 goto free;
3000 p->sysctl_table = t;
3001 return 0;
3003 free:
3004 kfree(t);
3005 err:
3006 return -ENOBUFS;
3008 EXPORT_SYMBOL(neigh_sysctl_register);
3010 void neigh_sysctl_unregister(struct neigh_parms *p)
3012 if (p->sysctl_table) {
3013 struct neigh_sysctl_table *t = p->sysctl_table;
3014 p->sysctl_table = NULL;
3015 unregister_net_sysctl_table(t->sysctl_header);
3016 kfree(t);
3019 EXPORT_SYMBOL(neigh_sysctl_unregister);
3021 #endif /* CONFIG_SYSCTL */
3023 static int __init neigh_init(void)
3025 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3026 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3027 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3029 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3030 NULL);
3031 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3033 return 0;
3036 subsys_initcall(neigh_init);