Linux 2.6.24.5
[linux/fpc-iii.git] / net / core / neighbour.c
blob29b8ee4e35d6dc53f32064c1c067c7287ce80924
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
24 #ifdef CONFIG_SYSCTL
25 #include <linux/sysctl.h>
26 #endif
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
30 #include <net/dst.h>
31 #include <net/sock.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/log2.h>
39 #define NEIGH_DEBUG 1
41 #define NEIGH_PRINTK(x...) printk(x)
42 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
43 #define NEIGH_PRINTK0 NEIGH_PRINTK
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
56 #define PNEIGH_HASHMASK 0xF
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
64 static struct neigh_table *neigh_tables;
65 #ifdef CONFIG_PROC_FS
66 static const struct file_operations neigh_stat_seq_fops;
67 #endif
70 Neighbour hash table buckets are protected with rwlock tbl->lock.
72 - All the scans/updates to hash buckets MUST be made under this lock.
73 - NOTHING clever should be made under this lock: no callbacks
74 to protocol backends, no attempts to send something to network.
75 It will result in deadlocks, if backend/driver wants to use neighbour
76 cache.
77 - If the entry requires some non-trivial actions, increase
78 its reference count and release table lock.
80 Neighbour entries are protected:
81 - with reference count.
82 - with rwlock neigh->lock
84 Reference count prevents destruction.
86 neigh->lock mainly serializes ll address data and its validity state.
87 However, the same lock is used to protect another entry fields:
88 - timer
89 - resolution queue
91 Again, nothing clever shall be made under neigh->lock,
92 the most complicated procedure, which we allow is dev->hard_header.
93 It is supposed, that dev->hard_header is simplistic and does
94 not make callbacks to neighbour tables.
96 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
97 list of neighbour tables. This list is used only in process context,
100 static DEFINE_RWLOCK(neigh_tbl_lock);
102 static int neigh_blackhole(struct sk_buff *skb)
104 kfree_skb(skb);
105 return -ENETDOWN;
108 static void neigh_cleanup_and_release(struct neighbour *neigh)
110 if (neigh->parms->neigh_cleanup)
111 neigh->parms->neigh_cleanup(neigh);
113 __neigh_notify(neigh, RTM_DELNEIGH, 0);
114 neigh_release(neigh);
118 * It is random distribution in the interval (1/2)*base...(3/2)*base.
119 * It corresponds to default IPv6 settings and is not overridable,
120 * because it is really reasonable choice.
123 unsigned long neigh_rand_reach_time(unsigned long base)
125 return (base ? (net_random() % base) + (base >> 1) : 0);
129 static int neigh_forced_gc(struct neigh_table *tbl)
131 int shrunk = 0;
132 int i;
134 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 write_lock_bh(&tbl->lock);
137 for (i = 0; i <= tbl->hash_mask; i++) {
138 struct neighbour *n, **np;
140 np = &tbl->hash_buckets[i];
141 while ((n = *np) != NULL) {
142 /* Neighbour record may be discarded if:
143 * - nobody refers to it.
144 * - it is not permanent
146 write_lock(&n->lock);
147 if (atomic_read(&n->refcnt) == 1 &&
148 !(n->nud_state & NUD_PERMANENT)) {
149 *np = n->next;
150 n->dead = 1;
151 shrunk = 1;
152 write_unlock(&n->lock);
153 neigh_cleanup_and_release(n);
154 continue;
156 write_unlock(&n->lock);
157 np = &n->next;
161 tbl->last_flush = jiffies;
163 write_unlock_bh(&tbl->lock);
165 return shrunk;
168 static int neigh_del_timer(struct neighbour *n)
170 if ((n->nud_state & NUD_IN_TIMER) &&
171 del_timer(&n->timer)) {
172 neigh_release(n);
173 return 1;
175 return 0;
178 static void pneigh_queue_purge(struct sk_buff_head *list)
180 struct sk_buff *skb;
182 while ((skb = skb_dequeue(list)) != NULL) {
183 dev_put(skb->dev);
184 kfree_skb(skb);
188 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
190 int i;
192 for (i = 0; i <= tbl->hash_mask; i++) {
193 struct neighbour *n, **np = &tbl->hash_buckets[i];
195 while ((n = *np) != NULL) {
196 if (dev && n->dev != dev) {
197 np = &n->next;
198 continue;
200 *np = n->next;
201 write_lock(&n->lock);
202 neigh_del_timer(n);
203 n->dead = 1;
205 if (atomic_read(&n->refcnt) != 1) {
206 /* The most unpleasant situation.
207 We must destroy neighbour entry,
208 but someone still uses it.
210 The destroy will be delayed until
211 the last user releases us, but
212 we must kill timers etc. and move
213 it to safe state.
215 skb_queue_purge(&n->arp_queue);
216 n->output = neigh_blackhole;
217 if (n->nud_state & NUD_VALID)
218 n->nud_state = NUD_NOARP;
219 else
220 n->nud_state = NUD_NONE;
221 NEIGH_PRINTK2("neigh %p is stray.\n", n);
223 write_unlock(&n->lock);
224 neigh_cleanup_and_release(n);
229 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
231 write_lock_bh(&tbl->lock);
232 neigh_flush_dev(tbl, dev);
233 write_unlock_bh(&tbl->lock);
236 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
238 write_lock_bh(&tbl->lock);
239 neigh_flush_dev(tbl, dev);
240 pneigh_ifdown(tbl, dev);
241 write_unlock_bh(&tbl->lock);
243 del_timer_sync(&tbl->proxy_timer);
244 pneigh_queue_purge(&tbl->proxy_queue);
245 return 0;
248 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
250 struct neighbour *n = NULL;
251 unsigned long now = jiffies;
252 int entries;
254 entries = atomic_inc_return(&tbl->entries) - 1;
255 if (entries >= tbl->gc_thresh3 ||
256 (entries >= tbl->gc_thresh2 &&
257 time_after(now, tbl->last_flush + 5 * HZ))) {
258 if (!neigh_forced_gc(tbl) &&
259 entries >= tbl->gc_thresh3)
260 goto out_entries;
263 n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
264 if (!n)
265 goto out_entries;
267 skb_queue_head_init(&n->arp_queue);
268 rwlock_init(&n->lock);
269 n->updated = n->used = now;
270 n->nud_state = NUD_NONE;
271 n->output = neigh_blackhole;
272 n->parms = neigh_parms_clone(&tbl->parms);
273 init_timer(&n->timer);
274 n->timer.function = neigh_timer_handler;
275 n->timer.data = (unsigned long)n;
277 NEIGH_CACHE_STAT_INC(tbl, allocs);
278 n->tbl = tbl;
279 atomic_set(&n->refcnt, 1);
280 n->dead = 1;
281 out:
282 return n;
284 out_entries:
285 atomic_dec(&tbl->entries);
286 goto out;
289 static struct neighbour **neigh_hash_alloc(unsigned int entries)
291 unsigned long size = entries * sizeof(struct neighbour *);
292 struct neighbour **ret;
294 if (size <= PAGE_SIZE) {
295 ret = kzalloc(size, GFP_ATOMIC);
296 } else {
297 ret = (struct neighbour **)
298 __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
300 return ret;
303 static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
305 unsigned long size = entries * sizeof(struct neighbour *);
307 if (size <= PAGE_SIZE)
308 kfree(hash);
309 else
310 free_pages((unsigned long)hash, get_order(size));
313 static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
315 struct neighbour **new_hash, **old_hash;
316 unsigned int i, new_hash_mask, old_entries;
318 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
320 BUG_ON(!is_power_of_2(new_entries));
321 new_hash = neigh_hash_alloc(new_entries);
322 if (!new_hash)
323 return;
325 old_entries = tbl->hash_mask + 1;
326 new_hash_mask = new_entries - 1;
327 old_hash = tbl->hash_buckets;
329 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
330 for (i = 0; i < old_entries; i++) {
331 struct neighbour *n, *next;
333 for (n = old_hash[i]; n; n = next) {
334 unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
336 hash_val &= new_hash_mask;
337 next = n->next;
339 n->next = new_hash[hash_val];
340 new_hash[hash_val] = n;
343 tbl->hash_buckets = new_hash;
344 tbl->hash_mask = new_hash_mask;
346 neigh_hash_free(old_hash, old_entries);
349 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
350 struct net_device *dev)
352 struct neighbour *n;
353 int key_len = tbl->key_len;
354 u32 hash_val = tbl->hash(pkey, dev);
356 NEIGH_CACHE_STAT_INC(tbl, lookups);
358 read_lock_bh(&tbl->lock);
359 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
360 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
361 neigh_hold(n);
362 NEIGH_CACHE_STAT_INC(tbl, hits);
363 break;
366 read_unlock_bh(&tbl->lock);
367 return n;
370 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
372 struct neighbour *n;
373 int key_len = tbl->key_len;
374 u32 hash_val = tbl->hash(pkey, NULL);
376 NEIGH_CACHE_STAT_INC(tbl, lookups);
378 read_lock_bh(&tbl->lock);
379 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
380 if (!memcmp(n->primary_key, pkey, key_len)) {
381 neigh_hold(n);
382 NEIGH_CACHE_STAT_INC(tbl, hits);
383 break;
386 read_unlock_bh(&tbl->lock);
387 return n;
390 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
391 struct net_device *dev)
393 u32 hash_val;
394 int key_len = tbl->key_len;
395 int error;
396 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
398 if (!n) {
399 rc = ERR_PTR(-ENOBUFS);
400 goto out;
403 memcpy(n->primary_key, pkey, key_len);
404 n->dev = dev;
405 dev_hold(dev);
407 /* Protocol specific setup. */
408 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
409 rc = ERR_PTR(error);
410 goto out_neigh_release;
413 /* Device specific setup. */
414 if (n->parms->neigh_setup &&
415 (error = n->parms->neigh_setup(n)) < 0) {
416 rc = ERR_PTR(error);
417 goto out_neigh_release;
420 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
422 write_lock_bh(&tbl->lock);
424 if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
425 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
427 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
429 if (n->parms->dead) {
430 rc = ERR_PTR(-EINVAL);
431 goto out_tbl_unlock;
434 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
435 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
436 neigh_hold(n1);
437 rc = n1;
438 goto out_tbl_unlock;
442 n->next = tbl->hash_buckets[hash_val];
443 tbl->hash_buckets[hash_val] = n;
444 n->dead = 0;
445 neigh_hold(n);
446 write_unlock_bh(&tbl->lock);
447 NEIGH_PRINTK2("neigh %p is created.\n", n);
448 rc = n;
449 out:
450 return rc;
451 out_tbl_unlock:
452 write_unlock_bh(&tbl->lock);
453 out_neigh_release:
454 neigh_release(n);
455 goto out;
458 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
459 struct net_device *dev, int creat)
461 struct pneigh_entry *n;
462 int key_len = tbl->key_len;
463 u32 hash_val = *(u32 *)(pkey + key_len - 4);
465 hash_val ^= (hash_val >> 16);
466 hash_val ^= hash_val >> 8;
467 hash_val ^= hash_val >> 4;
468 hash_val &= PNEIGH_HASHMASK;
470 read_lock_bh(&tbl->lock);
472 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
473 if (!memcmp(n->key, pkey, key_len) &&
474 (n->dev == dev || !n->dev)) {
475 read_unlock_bh(&tbl->lock);
476 goto out;
479 read_unlock_bh(&tbl->lock);
480 n = NULL;
481 if (!creat)
482 goto out;
484 ASSERT_RTNL();
486 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
487 if (!n)
488 goto out;
490 memcpy(n->key, pkey, key_len);
491 n->dev = dev;
492 if (dev)
493 dev_hold(dev);
495 if (tbl->pconstructor && tbl->pconstructor(n)) {
496 if (dev)
497 dev_put(dev);
498 kfree(n);
499 n = NULL;
500 goto out;
503 write_lock_bh(&tbl->lock);
504 n->next = tbl->phash_buckets[hash_val];
505 tbl->phash_buckets[hash_val] = n;
506 write_unlock_bh(&tbl->lock);
507 out:
508 return n;
512 int pneigh_delete(struct neigh_table *tbl, const void *pkey,
513 struct net_device *dev)
515 struct pneigh_entry *n, **np;
516 int key_len = tbl->key_len;
517 u32 hash_val = *(u32 *)(pkey + key_len - 4);
519 hash_val ^= (hash_val >> 16);
520 hash_val ^= hash_val >> 8;
521 hash_val ^= hash_val >> 4;
522 hash_val &= PNEIGH_HASHMASK;
524 write_lock_bh(&tbl->lock);
525 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
526 np = &n->next) {
527 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
528 *np = n->next;
529 write_unlock_bh(&tbl->lock);
530 if (tbl->pdestructor)
531 tbl->pdestructor(n);
532 if (n->dev)
533 dev_put(n->dev);
534 kfree(n);
535 return 0;
538 write_unlock_bh(&tbl->lock);
539 return -ENOENT;
542 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
544 struct pneigh_entry *n, **np;
545 u32 h;
547 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
548 np = &tbl->phash_buckets[h];
549 while ((n = *np) != NULL) {
550 if (!dev || n->dev == dev) {
551 *np = n->next;
552 if (tbl->pdestructor)
553 tbl->pdestructor(n);
554 if (n->dev)
555 dev_put(n->dev);
556 kfree(n);
557 continue;
559 np = &n->next;
562 return -ENOENT;
567 * neighbour must already be out of the table;
570 void neigh_destroy(struct neighbour *neigh)
572 struct hh_cache *hh;
574 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
576 if (!neigh->dead) {
577 printk(KERN_WARNING
578 "Destroying alive neighbour %p\n", neigh);
579 dump_stack();
580 return;
583 if (neigh_del_timer(neigh))
584 printk(KERN_WARNING "Impossible event.\n");
586 while ((hh = neigh->hh) != NULL) {
587 neigh->hh = hh->hh_next;
588 hh->hh_next = NULL;
590 write_seqlock_bh(&hh->hh_lock);
591 hh->hh_output = neigh_blackhole;
592 write_sequnlock_bh(&hh->hh_lock);
593 if (atomic_dec_and_test(&hh->hh_refcnt))
594 kfree(hh);
597 skb_queue_purge(&neigh->arp_queue);
599 dev_put(neigh->dev);
600 neigh_parms_put(neigh->parms);
602 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
604 atomic_dec(&neigh->tbl->entries);
605 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
608 /* Neighbour state is suspicious;
609 disable fast path.
611 Called with write_locked neigh.
613 static void neigh_suspect(struct neighbour *neigh)
615 struct hh_cache *hh;
617 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
619 neigh->output = neigh->ops->output;
621 for (hh = neigh->hh; hh; hh = hh->hh_next)
622 hh->hh_output = neigh->ops->output;
625 /* Neighbour state is OK;
626 enable fast path.
628 Called with write_locked neigh.
630 static void neigh_connect(struct neighbour *neigh)
632 struct hh_cache *hh;
634 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
636 neigh->output = neigh->ops->connected_output;
638 for (hh = neigh->hh; hh; hh = hh->hh_next)
639 hh->hh_output = neigh->ops->hh_output;
642 static void neigh_periodic_timer(unsigned long arg)
644 struct neigh_table *tbl = (struct neigh_table *)arg;
645 struct neighbour *n, **np;
646 unsigned long expire, now = jiffies;
648 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
650 write_lock(&tbl->lock);
653 * periodically recompute ReachableTime from random function
656 if (time_after(now, tbl->last_rand + 300 * HZ)) {
657 struct neigh_parms *p;
658 tbl->last_rand = now;
659 for (p = &tbl->parms; p; p = p->next)
660 p->reachable_time =
661 neigh_rand_reach_time(p->base_reachable_time);
664 np = &tbl->hash_buckets[tbl->hash_chain_gc];
665 tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
667 while ((n = *np) != NULL) {
668 unsigned int state;
670 write_lock(&n->lock);
672 state = n->nud_state;
673 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
674 write_unlock(&n->lock);
675 goto next_elt;
678 if (time_before(n->used, n->confirmed))
679 n->used = n->confirmed;
681 if (atomic_read(&n->refcnt) == 1 &&
682 (state == NUD_FAILED ||
683 time_after(now, n->used + n->parms->gc_staletime))) {
684 *np = n->next;
685 n->dead = 1;
686 write_unlock(&n->lock);
687 neigh_cleanup_and_release(n);
688 continue;
690 write_unlock(&n->lock);
692 next_elt:
693 np = &n->next;
696 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
697 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
698 * base_reachable_time.
700 expire = tbl->parms.base_reachable_time >> 1;
701 expire /= (tbl->hash_mask + 1);
702 if (!expire)
703 expire = 1;
705 if (expire>HZ)
706 mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
707 else
708 mod_timer(&tbl->gc_timer, now + expire);
710 write_unlock(&tbl->lock);
713 static __inline__ int neigh_max_probes(struct neighbour *n)
715 struct neigh_parms *p = n->parms;
716 return (n->nud_state & NUD_PROBE ?
717 p->ucast_probes :
718 p->ucast_probes + p->app_probes + p->mcast_probes);
721 static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
723 if (unlikely(mod_timer(&n->timer, when))) {
724 printk("NEIGH: BUG, double timer add, state is %x\n",
725 n->nud_state);
726 dump_stack();
730 /* Called when a timer expires for a neighbour entry. */
732 static void neigh_timer_handler(unsigned long arg)
734 unsigned long now, next;
735 struct neighbour *neigh = (struct neighbour *)arg;
736 unsigned state;
737 int notify = 0;
739 write_lock(&neigh->lock);
741 state = neigh->nud_state;
742 now = jiffies;
743 next = now + HZ;
745 if (!(state & NUD_IN_TIMER)) {
746 #ifndef CONFIG_SMP
747 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
748 #endif
749 goto out;
752 if (state & NUD_REACHABLE) {
753 if (time_before_eq(now,
754 neigh->confirmed + neigh->parms->reachable_time)) {
755 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
756 next = neigh->confirmed + neigh->parms->reachable_time;
757 } else if (time_before_eq(now,
758 neigh->used + neigh->parms->delay_probe_time)) {
759 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
760 neigh->nud_state = NUD_DELAY;
761 neigh->updated = jiffies;
762 neigh_suspect(neigh);
763 next = now + neigh->parms->delay_probe_time;
764 } else {
765 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
766 neigh->nud_state = NUD_STALE;
767 neigh->updated = jiffies;
768 neigh_suspect(neigh);
769 notify = 1;
771 } else if (state & NUD_DELAY) {
772 if (time_before_eq(now,
773 neigh->confirmed + neigh->parms->delay_probe_time)) {
774 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
775 neigh->nud_state = NUD_REACHABLE;
776 neigh->updated = jiffies;
777 neigh_connect(neigh);
778 notify = 1;
779 next = neigh->confirmed + neigh->parms->reachable_time;
780 } else {
781 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
782 neigh->nud_state = NUD_PROBE;
783 neigh->updated = jiffies;
784 atomic_set(&neigh->probes, 0);
785 next = now + neigh->parms->retrans_time;
787 } else {
788 /* NUD_PROBE|NUD_INCOMPLETE */
789 next = now + neigh->parms->retrans_time;
792 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
793 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
794 struct sk_buff *skb;
796 neigh->nud_state = NUD_FAILED;
797 neigh->updated = jiffies;
798 notify = 1;
799 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
800 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
802 /* It is very thin place. report_unreachable is very complicated
803 routine. Particularly, it can hit the same neighbour entry!
805 So that, we try to be accurate and avoid dead loop. --ANK
807 while (neigh->nud_state == NUD_FAILED &&
808 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
809 write_unlock(&neigh->lock);
810 neigh->ops->error_report(neigh, skb);
811 write_lock(&neigh->lock);
813 skb_queue_purge(&neigh->arp_queue);
816 if (neigh->nud_state & NUD_IN_TIMER) {
817 if (time_before(next, jiffies + HZ/2))
818 next = jiffies + HZ/2;
819 if (!mod_timer(&neigh->timer, next))
820 neigh_hold(neigh);
822 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
823 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
824 /* keep skb alive even if arp_queue overflows */
825 if (skb)
826 skb_get(skb);
827 write_unlock(&neigh->lock);
828 neigh->ops->solicit(neigh, skb);
829 atomic_inc(&neigh->probes);
830 if (skb)
831 kfree_skb(skb);
832 } else {
833 out:
834 write_unlock(&neigh->lock);
837 if (notify)
838 neigh_update_notify(neigh);
840 neigh_release(neigh);
843 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
845 int rc;
846 unsigned long now;
848 write_lock_bh(&neigh->lock);
850 rc = 0;
851 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
852 goto out_unlock_bh;
854 now = jiffies;
856 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
857 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
858 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
859 neigh->nud_state = NUD_INCOMPLETE;
860 neigh->updated = jiffies;
861 neigh_hold(neigh);
862 neigh_add_timer(neigh, now + 1);
863 } else {
864 neigh->nud_state = NUD_FAILED;
865 neigh->updated = jiffies;
866 write_unlock_bh(&neigh->lock);
868 if (skb)
869 kfree_skb(skb);
870 return 1;
872 } else if (neigh->nud_state & NUD_STALE) {
873 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
874 neigh_hold(neigh);
875 neigh->nud_state = NUD_DELAY;
876 neigh->updated = jiffies;
877 neigh_add_timer(neigh,
878 jiffies + neigh->parms->delay_probe_time);
881 if (neigh->nud_state == NUD_INCOMPLETE) {
882 if (skb) {
883 if (skb_queue_len(&neigh->arp_queue) >=
884 neigh->parms->queue_len) {
885 struct sk_buff *buff;
886 buff = neigh->arp_queue.next;
887 __skb_unlink(buff, &neigh->arp_queue);
888 kfree_skb(buff);
890 __skb_queue_tail(&neigh->arp_queue, skb);
892 rc = 1;
894 out_unlock_bh:
895 write_unlock_bh(&neigh->lock);
896 return rc;
899 static void neigh_update_hhs(struct neighbour *neigh)
901 struct hh_cache *hh;
902 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
903 = neigh->dev->header_ops->cache_update;
905 if (update) {
906 for (hh = neigh->hh; hh; hh = hh->hh_next) {
907 write_seqlock_bh(&hh->hh_lock);
908 update(hh, neigh->dev, neigh->ha);
909 write_sequnlock_bh(&hh->hh_lock);
916 /* Generic update routine.
917 -- lladdr is new lladdr or NULL, if it is not supplied.
918 -- new is new state.
919 -- flags
920 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
921 if it is different.
922 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
923 lladdr instead of overriding it
924 if it is different.
925 It also allows to retain current state
926 if lladdr is unchanged.
927 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
929 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
930 NTF_ROUTER flag.
931 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
932 a router.
934 Caller MUST hold reference count on the entry.
937 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
938 u32 flags)
940 u8 old;
941 int err;
942 int notify = 0;
943 struct net_device *dev;
944 int update_isrouter = 0;
946 write_lock_bh(&neigh->lock);
948 dev = neigh->dev;
949 old = neigh->nud_state;
950 err = -EPERM;
952 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
953 (old & (NUD_NOARP | NUD_PERMANENT)))
954 goto out;
956 if (!(new & NUD_VALID)) {
957 neigh_del_timer(neigh);
958 if (old & NUD_CONNECTED)
959 neigh_suspect(neigh);
960 neigh->nud_state = new;
961 err = 0;
962 notify = old & NUD_VALID;
963 goto out;
966 /* Compare new lladdr with cached one */
967 if (!dev->addr_len) {
968 /* First case: device needs no address. */
969 lladdr = neigh->ha;
970 } else if (lladdr) {
971 /* The second case: if something is already cached
972 and a new address is proposed:
973 - compare new & old
974 - if they are different, check override flag
976 if ((old & NUD_VALID) &&
977 !memcmp(lladdr, neigh->ha, dev->addr_len))
978 lladdr = neigh->ha;
979 } else {
980 /* No address is supplied; if we know something,
981 use it, otherwise discard the request.
983 err = -EINVAL;
984 if (!(old & NUD_VALID))
985 goto out;
986 lladdr = neigh->ha;
989 if (new & NUD_CONNECTED)
990 neigh->confirmed = jiffies;
991 neigh->updated = jiffies;
993 /* If entry was valid and address is not changed,
994 do not change entry state, if new one is STALE.
996 err = 0;
997 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
998 if (old & NUD_VALID) {
999 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1000 update_isrouter = 0;
1001 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1002 (old & NUD_CONNECTED)) {
1003 lladdr = neigh->ha;
1004 new = NUD_STALE;
1005 } else
1006 goto out;
1007 } else {
1008 if (lladdr == neigh->ha && new == NUD_STALE &&
1009 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1010 (old & NUD_CONNECTED))
1012 new = old;
1016 if (new != old) {
1017 neigh_del_timer(neigh);
1018 if (new & NUD_IN_TIMER) {
1019 neigh_hold(neigh);
1020 neigh_add_timer(neigh, (jiffies +
1021 ((new & NUD_REACHABLE) ?
1022 neigh->parms->reachable_time :
1023 0)));
1025 neigh->nud_state = new;
1028 if (lladdr != neigh->ha) {
1029 memcpy(&neigh->ha, lladdr, dev->addr_len);
1030 neigh_update_hhs(neigh);
1031 if (!(new & NUD_CONNECTED))
1032 neigh->confirmed = jiffies -
1033 (neigh->parms->base_reachable_time << 1);
1034 notify = 1;
1036 if (new == old)
1037 goto out;
1038 if (new & NUD_CONNECTED)
1039 neigh_connect(neigh);
1040 else
1041 neigh_suspect(neigh);
1042 if (!(old & NUD_VALID)) {
1043 struct sk_buff *skb;
1045 /* Again: avoid dead loop if something went wrong */
1047 while (neigh->nud_state & NUD_VALID &&
1048 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1049 struct neighbour *n1 = neigh;
1050 write_unlock_bh(&neigh->lock);
1051 /* On shaper/eql skb->dst->neighbour != neigh :( */
1052 if (skb->dst && skb->dst->neighbour)
1053 n1 = skb->dst->neighbour;
1054 n1->output(skb);
1055 write_lock_bh(&neigh->lock);
1057 skb_queue_purge(&neigh->arp_queue);
1059 out:
1060 if (update_isrouter) {
1061 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1062 (neigh->flags | NTF_ROUTER) :
1063 (neigh->flags & ~NTF_ROUTER);
1065 write_unlock_bh(&neigh->lock);
1067 if (notify)
1068 neigh_update_notify(neigh);
1070 return err;
1073 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1074 u8 *lladdr, void *saddr,
1075 struct net_device *dev)
1077 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1078 lladdr || !dev->addr_len);
1079 if (neigh)
1080 neigh_update(neigh, lladdr, NUD_STALE,
1081 NEIGH_UPDATE_F_OVERRIDE);
1082 return neigh;
1085 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1086 __be16 protocol)
1088 struct hh_cache *hh;
1089 struct net_device *dev = dst->dev;
1091 for (hh = n->hh; hh; hh = hh->hh_next)
1092 if (hh->hh_type == protocol)
1093 break;
1095 if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
1096 seqlock_init(&hh->hh_lock);
1097 hh->hh_type = protocol;
1098 atomic_set(&hh->hh_refcnt, 0);
1099 hh->hh_next = NULL;
1101 if (dev->header_ops->cache(n, hh)) {
1102 kfree(hh);
1103 hh = NULL;
1104 } else {
1105 atomic_inc(&hh->hh_refcnt);
1106 hh->hh_next = n->hh;
1107 n->hh = hh;
1108 if (n->nud_state & NUD_CONNECTED)
1109 hh->hh_output = n->ops->hh_output;
1110 else
1111 hh->hh_output = n->ops->output;
1114 if (hh) {
1115 atomic_inc(&hh->hh_refcnt);
1116 dst->hh = hh;
1120 /* This function can be used in contexts, where only old dev_queue_xmit
1121 worked, f.e. if you want to override normal output path (eql, shaper),
1122 but resolution is not made yet.
1125 int neigh_compat_output(struct sk_buff *skb)
1127 struct net_device *dev = skb->dev;
1129 __skb_pull(skb, skb_network_offset(skb));
1131 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1132 skb->len) < 0 &&
1133 dev->header_ops->rebuild(skb))
1134 return 0;
1136 return dev_queue_xmit(skb);
1139 /* Slow and careful. */
1141 int neigh_resolve_output(struct sk_buff *skb)
1143 struct dst_entry *dst = skb->dst;
1144 struct neighbour *neigh;
1145 int rc = 0;
1147 if (!dst || !(neigh = dst->neighbour))
1148 goto discard;
1150 __skb_pull(skb, skb_network_offset(skb));
1152 if (!neigh_event_send(neigh, skb)) {
1153 int err;
1154 struct net_device *dev = neigh->dev;
1155 if (dev->header_ops->cache && !dst->hh) {
1156 write_lock_bh(&neigh->lock);
1157 if (!dst->hh)
1158 neigh_hh_init(neigh, dst, dst->ops->protocol);
1159 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1160 neigh->ha, NULL, skb->len);
1161 write_unlock_bh(&neigh->lock);
1162 } else {
1163 read_lock_bh(&neigh->lock);
1164 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1165 neigh->ha, NULL, skb->len);
1166 read_unlock_bh(&neigh->lock);
1168 if (err >= 0)
1169 rc = neigh->ops->queue_xmit(skb);
1170 else
1171 goto out_kfree_skb;
1173 out:
1174 return rc;
1175 discard:
1176 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1177 dst, dst ? dst->neighbour : NULL);
1178 out_kfree_skb:
1179 rc = -EINVAL;
1180 kfree_skb(skb);
1181 goto out;
1184 /* As fast as possible without hh cache */
1186 int neigh_connected_output(struct sk_buff *skb)
1188 int err;
1189 struct dst_entry *dst = skb->dst;
1190 struct neighbour *neigh = dst->neighbour;
1191 struct net_device *dev = neigh->dev;
1193 __skb_pull(skb, skb_network_offset(skb));
1195 read_lock_bh(&neigh->lock);
1196 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1197 neigh->ha, NULL, skb->len);
1198 read_unlock_bh(&neigh->lock);
1199 if (err >= 0)
1200 err = neigh->ops->queue_xmit(skb);
1201 else {
1202 err = -EINVAL;
1203 kfree_skb(skb);
1205 return err;
1208 static void neigh_proxy_process(unsigned long arg)
1210 struct neigh_table *tbl = (struct neigh_table *)arg;
1211 long sched_next = 0;
1212 unsigned long now = jiffies;
1213 struct sk_buff *skb;
1215 spin_lock(&tbl->proxy_queue.lock);
1217 skb = tbl->proxy_queue.next;
1219 while (skb != (struct sk_buff *)&tbl->proxy_queue) {
1220 struct sk_buff *back = skb;
1221 long tdif = NEIGH_CB(back)->sched_next - now;
1223 skb = skb->next;
1224 if (tdif <= 0) {
1225 struct net_device *dev = back->dev;
1226 __skb_unlink(back, &tbl->proxy_queue);
1227 if (tbl->proxy_redo && netif_running(dev))
1228 tbl->proxy_redo(back);
1229 else
1230 kfree_skb(back);
1232 dev_put(dev);
1233 } else if (!sched_next || tdif < sched_next)
1234 sched_next = tdif;
1236 del_timer(&tbl->proxy_timer);
1237 if (sched_next)
1238 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1239 spin_unlock(&tbl->proxy_queue.lock);
1242 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1243 struct sk_buff *skb)
1245 unsigned long now = jiffies;
1246 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1248 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1249 kfree_skb(skb);
1250 return;
1253 NEIGH_CB(skb)->sched_next = sched_next;
1254 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1256 spin_lock(&tbl->proxy_queue.lock);
1257 if (del_timer(&tbl->proxy_timer)) {
1258 if (time_before(tbl->proxy_timer.expires, sched_next))
1259 sched_next = tbl->proxy_timer.expires;
1261 dst_release(skb->dst);
1262 skb->dst = NULL;
1263 dev_hold(skb->dev);
1264 __skb_queue_tail(&tbl->proxy_queue, skb);
1265 mod_timer(&tbl->proxy_timer, sched_next);
1266 spin_unlock(&tbl->proxy_queue.lock);
1270 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1271 struct neigh_table *tbl)
1273 struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1275 if (p) {
1276 p->tbl = tbl;
1277 atomic_set(&p->refcnt, 1);
1278 INIT_RCU_HEAD(&p->rcu_head);
1279 p->reachable_time =
1280 neigh_rand_reach_time(p->base_reachable_time);
1281 if (dev) {
1282 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1283 kfree(p);
1284 return NULL;
1287 dev_hold(dev);
1288 p->dev = dev;
1290 p->sysctl_table = NULL;
1291 write_lock_bh(&tbl->lock);
1292 p->next = tbl->parms.next;
1293 tbl->parms.next = p;
1294 write_unlock_bh(&tbl->lock);
1296 return p;
1299 static void neigh_rcu_free_parms(struct rcu_head *head)
1301 struct neigh_parms *parms =
1302 container_of(head, struct neigh_parms, rcu_head);
1304 neigh_parms_put(parms);
1307 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1309 struct neigh_parms **p;
1311 if (!parms || parms == &tbl->parms)
1312 return;
1313 write_lock_bh(&tbl->lock);
1314 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1315 if (*p == parms) {
1316 *p = parms->next;
1317 parms->dead = 1;
1318 write_unlock_bh(&tbl->lock);
1319 if (parms->dev)
1320 dev_put(parms->dev);
1321 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1322 return;
1325 write_unlock_bh(&tbl->lock);
1326 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1329 void neigh_parms_destroy(struct neigh_parms *parms)
1331 kfree(parms);
1334 static struct lock_class_key neigh_table_proxy_queue_class;
1336 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1338 unsigned long now = jiffies;
1339 unsigned long phsize;
1341 atomic_set(&tbl->parms.refcnt, 1);
1342 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1343 tbl->parms.reachable_time =
1344 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1346 if (!tbl->kmem_cachep)
1347 tbl->kmem_cachep =
1348 kmem_cache_create(tbl->id, tbl->entry_size, 0,
1349 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1350 NULL);
1351 tbl->stats = alloc_percpu(struct neigh_statistics);
1352 if (!tbl->stats)
1353 panic("cannot create neighbour cache statistics");
1355 #ifdef CONFIG_PROC_FS
1356 tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
1357 if (!tbl->pde)
1358 panic("cannot create neighbour proc dir entry");
1359 tbl->pde->proc_fops = &neigh_stat_seq_fops;
1360 tbl->pde->data = tbl;
1361 #endif
1363 tbl->hash_mask = 1;
1364 tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1366 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1367 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1369 if (!tbl->hash_buckets || !tbl->phash_buckets)
1370 panic("cannot allocate neighbour cache hashes");
1372 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1374 rwlock_init(&tbl->lock);
1375 init_timer(&tbl->gc_timer);
1376 tbl->gc_timer.data = (unsigned long)tbl;
1377 tbl->gc_timer.function = neigh_periodic_timer;
1378 tbl->gc_timer.expires = now + 1;
1379 add_timer(&tbl->gc_timer);
1381 init_timer(&tbl->proxy_timer);
1382 tbl->proxy_timer.data = (unsigned long)tbl;
1383 tbl->proxy_timer.function = neigh_proxy_process;
1384 skb_queue_head_init_class(&tbl->proxy_queue,
1385 &neigh_table_proxy_queue_class);
1387 tbl->last_flush = now;
1388 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1391 void neigh_table_init(struct neigh_table *tbl)
1393 struct neigh_table *tmp;
1395 neigh_table_init_no_netlink(tbl);
1396 write_lock(&neigh_tbl_lock);
1397 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1398 if (tmp->family == tbl->family)
1399 break;
1401 tbl->next = neigh_tables;
1402 neigh_tables = tbl;
1403 write_unlock(&neigh_tbl_lock);
1405 if (unlikely(tmp)) {
1406 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1407 "family %d\n", tbl->family);
1408 dump_stack();
1412 int neigh_table_clear(struct neigh_table *tbl)
1414 struct neigh_table **tp;
1416 /* It is not clean... Fix it to unload IPv6 module safely */
1417 del_timer_sync(&tbl->gc_timer);
1418 del_timer_sync(&tbl->proxy_timer);
1419 pneigh_queue_purge(&tbl->proxy_queue);
1420 neigh_ifdown(tbl, NULL);
1421 if (atomic_read(&tbl->entries))
1422 printk(KERN_CRIT "neighbour leakage\n");
1423 write_lock(&neigh_tbl_lock);
1424 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1425 if (*tp == tbl) {
1426 *tp = tbl->next;
1427 break;
1430 write_unlock(&neigh_tbl_lock);
1432 neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
1433 tbl->hash_buckets = NULL;
1435 kfree(tbl->phash_buckets);
1436 tbl->phash_buckets = NULL;
1438 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1440 free_percpu(tbl->stats);
1441 tbl->stats = NULL;
1443 kmem_cache_destroy(tbl->kmem_cachep);
1444 tbl->kmem_cachep = NULL;
1446 return 0;
1449 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1451 struct net *net = skb->sk->sk_net;
1452 struct ndmsg *ndm;
1453 struct nlattr *dst_attr;
1454 struct neigh_table *tbl;
1455 struct net_device *dev = NULL;
1456 int err = -EINVAL;
1458 if (nlmsg_len(nlh) < sizeof(*ndm))
1459 goto out;
1461 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1462 if (dst_attr == NULL)
1463 goto out;
1465 ndm = nlmsg_data(nlh);
1466 if (ndm->ndm_ifindex) {
1467 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1468 if (dev == NULL) {
1469 err = -ENODEV;
1470 goto out;
1474 read_lock(&neigh_tbl_lock);
1475 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1476 struct neighbour *neigh;
1478 if (tbl->family != ndm->ndm_family)
1479 continue;
1480 read_unlock(&neigh_tbl_lock);
1482 if (nla_len(dst_attr) < tbl->key_len)
1483 goto out_dev_put;
1485 if (ndm->ndm_flags & NTF_PROXY) {
1486 err = pneigh_delete(tbl, nla_data(dst_attr), dev);
1487 goto out_dev_put;
1490 if (dev == NULL)
1491 goto out_dev_put;
1493 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1494 if (neigh == NULL) {
1495 err = -ENOENT;
1496 goto out_dev_put;
1499 err = neigh_update(neigh, NULL, NUD_FAILED,
1500 NEIGH_UPDATE_F_OVERRIDE |
1501 NEIGH_UPDATE_F_ADMIN);
1502 neigh_release(neigh);
1503 goto out_dev_put;
1505 read_unlock(&neigh_tbl_lock);
1506 err = -EAFNOSUPPORT;
1508 out_dev_put:
1509 if (dev)
1510 dev_put(dev);
1511 out:
1512 return err;
1515 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1517 struct net *net = skb->sk->sk_net;
1518 struct ndmsg *ndm;
1519 struct nlattr *tb[NDA_MAX+1];
1520 struct neigh_table *tbl;
1521 struct net_device *dev = NULL;
1522 int err;
1524 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1525 if (err < 0)
1526 goto out;
1528 err = -EINVAL;
1529 if (tb[NDA_DST] == NULL)
1530 goto out;
1532 ndm = nlmsg_data(nlh);
1533 if (ndm->ndm_ifindex) {
1534 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1535 if (dev == NULL) {
1536 err = -ENODEV;
1537 goto out;
1540 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1541 goto out_dev_put;
1544 read_lock(&neigh_tbl_lock);
1545 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1546 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1547 struct neighbour *neigh;
1548 void *dst, *lladdr;
1550 if (tbl->family != ndm->ndm_family)
1551 continue;
1552 read_unlock(&neigh_tbl_lock);
1554 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1555 goto out_dev_put;
1556 dst = nla_data(tb[NDA_DST]);
1557 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1559 if (ndm->ndm_flags & NTF_PROXY) {
1560 struct pneigh_entry *pn;
1562 err = -ENOBUFS;
1563 pn = pneigh_lookup(tbl, dst, dev, 1);
1564 if (pn) {
1565 pn->flags = ndm->ndm_flags;
1566 err = 0;
1568 goto out_dev_put;
1571 if (dev == NULL)
1572 goto out_dev_put;
1574 neigh = neigh_lookup(tbl, dst, dev);
1575 if (neigh == NULL) {
1576 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1577 err = -ENOENT;
1578 goto out_dev_put;
1581 neigh = __neigh_lookup_errno(tbl, dst, dev);
1582 if (IS_ERR(neigh)) {
1583 err = PTR_ERR(neigh);
1584 goto out_dev_put;
1586 } else {
1587 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1588 err = -EEXIST;
1589 neigh_release(neigh);
1590 goto out_dev_put;
1593 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1594 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1597 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1598 neigh_release(neigh);
1599 goto out_dev_put;
1602 read_unlock(&neigh_tbl_lock);
1603 err = -EAFNOSUPPORT;
1605 out_dev_put:
1606 if (dev)
1607 dev_put(dev);
1608 out:
1609 return err;
1612 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1614 struct nlattr *nest;
1616 nest = nla_nest_start(skb, NDTA_PARMS);
1617 if (nest == NULL)
1618 return -ENOBUFS;
1620 if (parms->dev)
1621 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1623 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1624 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1625 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1626 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1627 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1628 NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1629 NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1630 NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1631 parms->base_reachable_time);
1632 NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1633 NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1634 NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1635 NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1636 NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1637 NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1639 return nla_nest_end(skb, nest);
1641 nla_put_failure:
1642 return nla_nest_cancel(skb, nest);
1645 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1646 u32 pid, u32 seq, int type, int flags)
1648 struct nlmsghdr *nlh;
1649 struct ndtmsg *ndtmsg;
1651 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1652 if (nlh == NULL)
1653 return -EMSGSIZE;
1655 ndtmsg = nlmsg_data(nlh);
1657 read_lock_bh(&tbl->lock);
1658 ndtmsg->ndtm_family = tbl->family;
1659 ndtmsg->ndtm_pad1 = 0;
1660 ndtmsg->ndtm_pad2 = 0;
1662 NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1663 NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1664 NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1665 NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1666 NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1669 unsigned long now = jiffies;
1670 unsigned int flush_delta = now - tbl->last_flush;
1671 unsigned int rand_delta = now - tbl->last_rand;
1673 struct ndt_config ndc = {
1674 .ndtc_key_len = tbl->key_len,
1675 .ndtc_entry_size = tbl->entry_size,
1676 .ndtc_entries = atomic_read(&tbl->entries),
1677 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1678 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1679 .ndtc_hash_rnd = tbl->hash_rnd,
1680 .ndtc_hash_mask = tbl->hash_mask,
1681 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1682 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1685 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1689 int cpu;
1690 struct ndt_stats ndst;
1692 memset(&ndst, 0, sizeof(ndst));
1694 for_each_possible_cpu(cpu) {
1695 struct neigh_statistics *st;
1697 st = per_cpu_ptr(tbl->stats, cpu);
1698 ndst.ndts_allocs += st->allocs;
1699 ndst.ndts_destroys += st->destroys;
1700 ndst.ndts_hash_grows += st->hash_grows;
1701 ndst.ndts_res_failed += st->res_failed;
1702 ndst.ndts_lookups += st->lookups;
1703 ndst.ndts_hits += st->hits;
1704 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1705 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1706 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1707 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1710 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1713 BUG_ON(tbl->parms.dev);
1714 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1715 goto nla_put_failure;
1717 read_unlock_bh(&tbl->lock);
1718 return nlmsg_end(skb, nlh);
1720 nla_put_failure:
1721 read_unlock_bh(&tbl->lock);
1722 nlmsg_cancel(skb, nlh);
1723 return -EMSGSIZE;
1726 static int neightbl_fill_param_info(struct sk_buff *skb,
1727 struct neigh_table *tbl,
1728 struct neigh_parms *parms,
1729 u32 pid, u32 seq, int type,
1730 unsigned int flags)
1732 struct ndtmsg *ndtmsg;
1733 struct nlmsghdr *nlh;
1735 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1736 if (nlh == NULL)
1737 return -EMSGSIZE;
1739 ndtmsg = nlmsg_data(nlh);
1741 read_lock_bh(&tbl->lock);
1742 ndtmsg->ndtm_family = tbl->family;
1743 ndtmsg->ndtm_pad1 = 0;
1744 ndtmsg->ndtm_pad2 = 0;
1746 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1747 neightbl_fill_parms(skb, parms) < 0)
1748 goto errout;
1750 read_unlock_bh(&tbl->lock);
1751 return nlmsg_end(skb, nlh);
1752 errout:
1753 read_unlock_bh(&tbl->lock);
1754 nlmsg_cancel(skb, nlh);
1755 return -EMSGSIZE;
1758 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1759 int ifindex)
1761 struct neigh_parms *p;
1763 for (p = &tbl->parms; p; p = p->next)
1764 if ((p->dev && p->dev->ifindex == ifindex) ||
1765 (!p->dev && !ifindex))
1766 return p;
1768 return NULL;
1771 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1772 [NDTA_NAME] = { .type = NLA_STRING },
1773 [NDTA_THRESH1] = { .type = NLA_U32 },
1774 [NDTA_THRESH2] = { .type = NLA_U32 },
1775 [NDTA_THRESH3] = { .type = NLA_U32 },
1776 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1777 [NDTA_PARMS] = { .type = NLA_NESTED },
1780 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1781 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1782 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1783 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1784 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1785 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1786 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1787 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1788 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1789 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1790 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1791 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1792 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1793 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1796 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1798 struct neigh_table *tbl;
1799 struct ndtmsg *ndtmsg;
1800 struct nlattr *tb[NDTA_MAX+1];
1801 int err;
1803 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1804 nl_neightbl_policy);
1805 if (err < 0)
1806 goto errout;
1808 if (tb[NDTA_NAME] == NULL) {
1809 err = -EINVAL;
1810 goto errout;
1813 ndtmsg = nlmsg_data(nlh);
1814 read_lock(&neigh_tbl_lock);
1815 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1816 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1817 continue;
1819 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1820 break;
1823 if (tbl == NULL) {
1824 err = -ENOENT;
1825 goto errout_locked;
1829 * We acquire tbl->lock to be nice to the periodic timers and
1830 * make sure they always see a consistent set of values.
1832 write_lock_bh(&tbl->lock);
1834 if (tb[NDTA_PARMS]) {
1835 struct nlattr *tbp[NDTPA_MAX+1];
1836 struct neigh_parms *p;
1837 int i, ifindex = 0;
1839 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1840 nl_ntbl_parm_policy);
1841 if (err < 0)
1842 goto errout_tbl_lock;
1844 if (tbp[NDTPA_IFINDEX])
1845 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1847 p = lookup_neigh_params(tbl, ifindex);
1848 if (p == NULL) {
1849 err = -ENOENT;
1850 goto errout_tbl_lock;
1853 for (i = 1; i <= NDTPA_MAX; i++) {
1854 if (tbp[i] == NULL)
1855 continue;
1857 switch (i) {
1858 case NDTPA_QUEUE_LEN:
1859 p->queue_len = nla_get_u32(tbp[i]);
1860 break;
1861 case NDTPA_PROXY_QLEN:
1862 p->proxy_qlen = nla_get_u32(tbp[i]);
1863 break;
1864 case NDTPA_APP_PROBES:
1865 p->app_probes = nla_get_u32(tbp[i]);
1866 break;
1867 case NDTPA_UCAST_PROBES:
1868 p->ucast_probes = nla_get_u32(tbp[i]);
1869 break;
1870 case NDTPA_MCAST_PROBES:
1871 p->mcast_probes = nla_get_u32(tbp[i]);
1872 break;
1873 case NDTPA_BASE_REACHABLE_TIME:
1874 p->base_reachable_time = nla_get_msecs(tbp[i]);
1875 break;
1876 case NDTPA_GC_STALETIME:
1877 p->gc_staletime = nla_get_msecs(tbp[i]);
1878 break;
1879 case NDTPA_DELAY_PROBE_TIME:
1880 p->delay_probe_time = nla_get_msecs(tbp[i]);
1881 break;
1882 case NDTPA_RETRANS_TIME:
1883 p->retrans_time = nla_get_msecs(tbp[i]);
1884 break;
1885 case NDTPA_ANYCAST_DELAY:
1886 p->anycast_delay = nla_get_msecs(tbp[i]);
1887 break;
1888 case NDTPA_PROXY_DELAY:
1889 p->proxy_delay = nla_get_msecs(tbp[i]);
1890 break;
1891 case NDTPA_LOCKTIME:
1892 p->locktime = nla_get_msecs(tbp[i]);
1893 break;
1898 if (tb[NDTA_THRESH1])
1899 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
1901 if (tb[NDTA_THRESH2])
1902 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
1904 if (tb[NDTA_THRESH3])
1905 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
1907 if (tb[NDTA_GC_INTERVAL])
1908 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
1910 err = 0;
1912 errout_tbl_lock:
1913 write_unlock_bh(&tbl->lock);
1914 errout_locked:
1915 read_unlock(&neigh_tbl_lock);
1916 errout:
1917 return err;
1920 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1922 int family, tidx, nidx = 0;
1923 int tbl_skip = cb->args[0];
1924 int neigh_skip = cb->args[1];
1925 struct neigh_table *tbl;
1927 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1929 read_lock(&neigh_tbl_lock);
1930 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
1931 struct neigh_parms *p;
1933 if (tidx < tbl_skip || (family && tbl->family != family))
1934 continue;
1936 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
1937 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
1938 NLM_F_MULTI) <= 0)
1939 break;
1941 for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
1942 if (nidx < neigh_skip)
1943 continue;
1945 if (neightbl_fill_param_info(skb, tbl, p,
1946 NETLINK_CB(cb->skb).pid,
1947 cb->nlh->nlmsg_seq,
1948 RTM_NEWNEIGHTBL,
1949 NLM_F_MULTI) <= 0)
1950 goto out;
1953 neigh_skip = 0;
1955 out:
1956 read_unlock(&neigh_tbl_lock);
1957 cb->args[0] = tidx;
1958 cb->args[1] = nidx;
1960 return skb->len;
1963 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
1964 u32 pid, u32 seq, int type, unsigned int flags)
1966 unsigned long now = jiffies;
1967 struct nda_cacheinfo ci;
1968 struct nlmsghdr *nlh;
1969 struct ndmsg *ndm;
1971 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
1972 if (nlh == NULL)
1973 return -EMSGSIZE;
1975 ndm = nlmsg_data(nlh);
1976 ndm->ndm_family = neigh->ops->family;
1977 ndm->ndm_pad1 = 0;
1978 ndm->ndm_pad2 = 0;
1979 ndm->ndm_flags = neigh->flags;
1980 ndm->ndm_type = neigh->type;
1981 ndm->ndm_ifindex = neigh->dev->ifindex;
1983 NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
1985 read_lock_bh(&neigh->lock);
1986 ndm->ndm_state = neigh->nud_state;
1987 if ((neigh->nud_state & NUD_VALID) &&
1988 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
1989 read_unlock_bh(&neigh->lock);
1990 goto nla_put_failure;
1993 ci.ndm_used = now - neigh->used;
1994 ci.ndm_confirmed = now - neigh->confirmed;
1995 ci.ndm_updated = now - neigh->updated;
1996 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
1997 read_unlock_bh(&neigh->lock);
1999 NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2000 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2002 return nlmsg_end(skb, nlh);
2004 nla_put_failure:
2005 nlmsg_cancel(skb, nlh);
2006 return -EMSGSIZE;
2009 static void neigh_update_notify(struct neighbour *neigh)
2011 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2012 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2015 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2016 struct netlink_callback *cb)
2018 struct neighbour *n;
2019 int rc, h, s_h = cb->args[1];
2020 int idx, s_idx = idx = cb->args[2];
2022 read_lock_bh(&tbl->lock);
2023 for (h = 0; h <= tbl->hash_mask; h++) {
2024 if (h < s_h)
2025 continue;
2026 if (h > s_h)
2027 s_idx = 0;
2028 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
2029 if (idx < s_idx)
2030 continue;
2031 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2032 cb->nlh->nlmsg_seq,
2033 RTM_NEWNEIGH,
2034 NLM_F_MULTI) <= 0) {
2035 read_unlock_bh(&tbl->lock);
2036 rc = -1;
2037 goto out;
2041 read_unlock_bh(&tbl->lock);
2042 rc = skb->len;
2043 out:
2044 cb->args[1] = h;
2045 cb->args[2] = idx;
2046 return rc;
2049 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2051 struct neigh_table *tbl;
2052 int t, family, s_t;
2054 read_lock(&neigh_tbl_lock);
2055 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2056 s_t = cb->args[0];
2058 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2059 if (t < s_t || (family && tbl->family != family))
2060 continue;
2061 if (t > s_t)
2062 memset(&cb->args[1], 0, sizeof(cb->args) -
2063 sizeof(cb->args[0]));
2064 if (neigh_dump_table(tbl, skb, cb) < 0)
2065 break;
2067 read_unlock(&neigh_tbl_lock);
2069 cb->args[0] = t;
2070 return skb->len;
2073 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2075 int chain;
2077 read_lock_bh(&tbl->lock);
2078 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2079 struct neighbour *n;
2081 for (n = tbl->hash_buckets[chain]; n; n = n->next)
2082 cb(n, cookie);
2084 read_unlock_bh(&tbl->lock);
2086 EXPORT_SYMBOL(neigh_for_each);
2088 /* The tbl->lock must be held as a writer and BH disabled. */
2089 void __neigh_for_each_release(struct neigh_table *tbl,
2090 int (*cb)(struct neighbour *))
2092 int chain;
2094 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2095 struct neighbour *n, **np;
2097 np = &tbl->hash_buckets[chain];
2098 while ((n = *np) != NULL) {
2099 int release;
2101 write_lock(&n->lock);
2102 release = cb(n);
2103 if (release) {
2104 *np = n->next;
2105 n->dead = 1;
2106 } else
2107 np = &n->next;
2108 write_unlock(&n->lock);
2109 if (release)
2110 neigh_cleanup_and_release(n);
2114 EXPORT_SYMBOL(__neigh_for_each_release);
2116 #ifdef CONFIG_PROC_FS
2118 static struct neighbour *neigh_get_first(struct seq_file *seq)
2120 struct neigh_seq_state *state = seq->private;
2121 struct neigh_table *tbl = state->tbl;
2122 struct neighbour *n = NULL;
2123 int bucket = state->bucket;
2125 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2126 for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
2127 n = tbl->hash_buckets[bucket];
2129 while (n) {
2130 if (state->neigh_sub_iter) {
2131 loff_t fakep = 0;
2132 void *v;
2134 v = state->neigh_sub_iter(state, n, &fakep);
2135 if (!v)
2136 goto next;
2138 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2139 break;
2140 if (n->nud_state & ~NUD_NOARP)
2141 break;
2142 next:
2143 n = n->next;
2146 if (n)
2147 break;
2149 state->bucket = bucket;
2151 return n;
2154 static struct neighbour *neigh_get_next(struct seq_file *seq,
2155 struct neighbour *n,
2156 loff_t *pos)
2158 struct neigh_seq_state *state = seq->private;
2159 struct neigh_table *tbl = state->tbl;
2161 if (state->neigh_sub_iter) {
2162 void *v = state->neigh_sub_iter(state, n, pos);
2163 if (v)
2164 return n;
2166 n = n->next;
2168 while (1) {
2169 while (n) {
2170 if (state->neigh_sub_iter) {
2171 void *v = state->neigh_sub_iter(state, n, pos);
2172 if (v)
2173 return n;
2174 goto next;
2176 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2177 break;
2179 if (n->nud_state & ~NUD_NOARP)
2180 break;
2181 next:
2182 n = n->next;
2185 if (n)
2186 break;
2188 if (++state->bucket > tbl->hash_mask)
2189 break;
2191 n = tbl->hash_buckets[state->bucket];
2194 if (n && pos)
2195 --(*pos);
2196 return n;
2199 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2201 struct neighbour *n = neigh_get_first(seq);
2203 if (n) {
2204 while (*pos) {
2205 n = neigh_get_next(seq, n, pos);
2206 if (!n)
2207 break;
2210 return *pos ? NULL : n;
2213 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2215 struct neigh_seq_state *state = seq->private;
2216 struct neigh_table *tbl = state->tbl;
2217 struct pneigh_entry *pn = NULL;
2218 int bucket = state->bucket;
2220 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2221 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2222 pn = tbl->phash_buckets[bucket];
2223 if (pn)
2224 break;
2226 state->bucket = bucket;
2228 return pn;
2231 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2232 struct pneigh_entry *pn,
2233 loff_t *pos)
2235 struct neigh_seq_state *state = seq->private;
2236 struct neigh_table *tbl = state->tbl;
2238 pn = pn->next;
2239 while (!pn) {
2240 if (++state->bucket > PNEIGH_HASHMASK)
2241 break;
2242 pn = tbl->phash_buckets[state->bucket];
2243 if (pn)
2244 break;
2247 if (pn && pos)
2248 --(*pos);
2250 return pn;
2253 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2255 struct pneigh_entry *pn = pneigh_get_first(seq);
2257 if (pn) {
2258 while (*pos) {
2259 pn = pneigh_get_next(seq, pn, pos);
2260 if (!pn)
2261 break;
2264 return *pos ? NULL : pn;
2267 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2269 struct neigh_seq_state *state = seq->private;
2270 void *rc;
2272 rc = neigh_get_idx(seq, pos);
2273 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2274 rc = pneigh_get_idx(seq, pos);
2276 return rc;
2279 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2281 struct neigh_seq_state *state = seq->private;
2282 loff_t pos_minus_one;
2284 state->tbl = tbl;
2285 state->bucket = 0;
2286 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2288 read_lock_bh(&tbl->lock);
2290 pos_minus_one = *pos - 1;
2291 return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
2293 EXPORT_SYMBOL(neigh_seq_start);
2295 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2297 struct neigh_seq_state *state;
2298 void *rc;
2300 if (v == SEQ_START_TOKEN) {
2301 rc = neigh_get_idx(seq, pos);
2302 goto out;
2305 state = seq->private;
2306 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2307 rc = neigh_get_next(seq, v, NULL);
2308 if (rc)
2309 goto out;
2310 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2311 rc = pneigh_get_first(seq);
2312 } else {
2313 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2314 rc = pneigh_get_next(seq, v, NULL);
2316 out:
2317 ++(*pos);
2318 return rc;
2320 EXPORT_SYMBOL(neigh_seq_next);
2322 void neigh_seq_stop(struct seq_file *seq, void *v)
2324 struct neigh_seq_state *state = seq->private;
2325 struct neigh_table *tbl = state->tbl;
2327 read_unlock_bh(&tbl->lock);
2329 EXPORT_SYMBOL(neigh_seq_stop);
2331 /* statistics via seq_file */
2333 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2335 struct proc_dir_entry *pde = seq->private;
2336 struct neigh_table *tbl = pde->data;
2337 int cpu;
2339 if (*pos == 0)
2340 return SEQ_START_TOKEN;
2342 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
2343 if (!cpu_possible(cpu))
2344 continue;
2345 *pos = cpu+1;
2346 return per_cpu_ptr(tbl->stats, cpu);
2348 return NULL;
2351 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2353 struct proc_dir_entry *pde = seq->private;
2354 struct neigh_table *tbl = pde->data;
2355 int cpu;
2357 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
2358 if (!cpu_possible(cpu))
2359 continue;
2360 *pos = cpu+1;
2361 return per_cpu_ptr(tbl->stats, cpu);
2363 return NULL;
2366 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2371 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2373 struct proc_dir_entry *pde = seq->private;
2374 struct neigh_table *tbl = pde->data;
2375 struct neigh_statistics *st = v;
2377 if (v == SEQ_START_TOKEN) {
2378 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n");
2379 return 0;
2382 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2383 "%08lx %08lx %08lx %08lx\n",
2384 atomic_read(&tbl->entries),
2386 st->allocs,
2387 st->destroys,
2388 st->hash_grows,
2390 st->lookups,
2391 st->hits,
2393 st->res_failed,
2395 st->rcv_probes_mcast,
2396 st->rcv_probes_ucast,
2398 st->periodic_gc_runs,
2399 st->forced_gc_runs
2402 return 0;
2405 static const struct seq_operations neigh_stat_seq_ops = {
2406 .start = neigh_stat_seq_start,
2407 .next = neigh_stat_seq_next,
2408 .stop = neigh_stat_seq_stop,
2409 .show = neigh_stat_seq_show,
2412 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2414 int ret = seq_open(file, &neigh_stat_seq_ops);
2416 if (!ret) {
2417 struct seq_file *sf = file->private_data;
2418 sf->private = PDE(inode);
2420 return ret;
2423 static const struct file_operations neigh_stat_seq_fops = {
2424 .owner = THIS_MODULE,
2425 .open = neigh_stat_seq_open,
2426 .read = seq_read,
2427 .llseek = seq_lseek,
2428 .release = seq_release,
2431 #endif /* CONFIG_PROC_FS */
2433 static inline size_t neigh_nlmsg_size(void)
2435 return NLMSG_ALIGN(sizeof(struct ndmsg))
2436 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2437 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2438 + nla_total_size(sizeof(struct nda_cacheinfo))
2439 + nla_total_size(4); /* NDA_PROBES */
2442 static void __neigh_notify(struct neighbour *n, int type, int flags)
2444 struct sk_buff *skb;
2445 int err = -ENOBUFS;
2447 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2448 if (skb == NULL)
2449 goto errout;
2451 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2452 if (err < 0) {
2453 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2454 WARN_ON(err == -EMSGSIZE);
2455 kfree_skb(skb);
2456 goto errout;
2458 err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2459 errout:
2460 if (err < 0)
2461 rtnl_set_sk_err(RTNLGRP_NEIGH, err);
2464 #ifdef CONFIG_ARPD
2465 void neigh_app_ns(struct neighbour *n)
2467 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2469 #endif /* CONFIG_ARPD */
2471 #ifdef CONFIG_SYSCTL
2473 static struct neigh_sysctl_table {
2474 struct ctl_table_header *sysctl_header;
2475 ctl_table neigh_vars[__NET_NEIGH_MAX];
2476 ctl_table neigh_dev[2];
2477 ctl_table neigh_neigh_dir[2];
2478 ctl_table neigh_proto_dir[2];
2479 ctl_table neigh_root_dir[2];
2480 } neigh_sysctl_template __read_mostly = {
2481 .neigh_vars = {
2483 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
2484 .procname = "mcast_solicit",
2485 .maxlen = sizeof(int),
2486 .mode = 0644,
2487 .proc_handler = &proc_dointvec,
2490 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
2491 .procname = "ucast_solicit",
2492 .maxlen = sizeof(int),
2493 .mode = 0644,
2494 .proc_handler = &proc_dointvec,
2497 .ctl_name = NET_NEIGH_APP_SOLICIT,
2498 .procname = "app_solicit",
2499 .maxlen = sizeof(int),
2500 .mode = 0644,
2501 .proc_handler = &proc_dointvec,
2504 .procname = "retrans_time",
2505 .maxlen = sizeof(int),
2506 .mode = 0644,
2507 .proc_handler = &proc_dointvec_userhz_jiffies,
2510 .ctl_name = NET_NEIGH_REACHABLE_TIME,
2511 .procname = "base_reachable_time",
2512 .maxlen = sizeof(int),
2513 .mode = 0644,
2514 .proc_handler = &proc_dointvec_jiffies,
2515 .strategy = &sysctl_jiffies,
2518 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
2519 .procname = "delay_first_probe_time",
2520 .maxlen = sizeof(int),
2521 .mode = 0644,
2522 .proc_handler = &proc_dointvec_jiffies,
2523 .strategy = &sysctl_jiffies,
2526 .ctl_name = NET_NEIGH_GC_STALE_TIME,
2527 .procname = "gc_stale_time",
2528 .maxlen = sizeof(int),
2529 .mode = 0644,
2530 .proc_handler = &proc_dointvec_jiffies,
2531 .strategy = &sysctl_jiffies,
2534 .ctl_name = NET_NEIGH_UNRES_QLEN,
2535 .procname = "unres_qlen",
2536 .maxlen = sizeof(int),
2537 .mode = 0644,
2538 .proc_handler = &proc_dointvec,
2541 .ctl_name = NET_NEIGH_PROXY_QLEN,
2542 .procname = "proxy_qlen",
2543 .maxlen = sizeof(int),
2544 .mode = 0644,
2545 .proc_handler = &proc_dointvec,
2548 .procname = "anycast_delay",
2549 .maxlen = sizeof(int),
2550 .mode = 0644,
2551 .proc_handler = &proc_dointvec_userhz_jiffies,
2554 .procname = "proxy_delay",
2555 .maxlen = sizeof(int),
2556 .mode = 0644,
2557 .proc_handler = &proc_dointvec_userhz_jiffies,
2560 .procname = "locktime",
2561 .maxlen = sizeof(int),
2562 .mode = 0644,
2563 .proc_handler = &proc_dointvec_userhz_jiffies,
2566 .ctl_name = NET_NEIGH_RETRANS_TIME_MS,
2567 .procname = "retrans_time_ms",
2568 .maxlen = sizeof(int),
2569 .mode = 0644,
2570 .proc_handler = &proc_dointvec_ms_jiffies,
2571 .strategy = &sysctl_ms_jiffies,
2574 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
2575 .procname = "base_reachable_time_ms",
2576 .maxlen = sizeof(int),
2577 .mode = 0644,
2578 .proc_handler = &proc_dointvec_ms_jiffies,
2579 .strategy = &sysctl_ms_jiffies,
2582 .ctl_name = NET_NEIGH_GC_INTERVAL,
2583 .procname = "gc_interval",
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
2586 .proc_handler = &proc_dointvec_jiffies,
2587 .strategy = &sysctl_jiffies,
2590 .ctl_name = NET_NEIGH_GC_THRESH1,
2591 .procname = "gc_thresh1",
2592 .maxlen = sizeof(int),
2593 .mode = 0644,
2594 .proc_handler = &proc_dointvec,
2597 .ctl_name = NET_NEIGH_GC_THRESH2,
2598 .procname = "gc_thresh2",
2599 .maxlen = sizeof(int),
2600 .mode = 0644,
2601 .proc_handler = &proc_dointvec,
2604 .ctl_name = NET_NEIGH_GC_THRESH3,
2605 .procname = "gc_thresh3",
2606 .maxlen = sizeof(int),
2607 .mode = 0644,
2608 .proc_handler = &proc_dointvec,
2612 .neigh_dev = {
2614 .ctl_name = NET_PROTO_CONF_DEFAULT,
2615 .procname = "default",
2616 .mode = 0555,
2619 .neigh_neigh_dir = {
2621 .procname = "neigh",
2622 .mode = 0555,
2625 .neigh_proto_dir = {
2627 .mode = 0555,
2630 .neigh_root_dir = {
2632 .ctl_name = CTL_NET,
2633 .procname = "net",
2634 .mode = 0555,
2639 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2640 int p_id, int pdev_id, char *p_name,
2641 proc_handler *handler, ctl_handler *strategy)
2643 struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
2644 sizeof(*t), GFP_KERNEL);
2645 const char *dev_name_source = NULL;
2646 char *dev_name = NULL;
2647 int err = 0;
2649 if (!t)
2650 return -ENOBUFS;
2651 t->neigh_vars[0].data = &p->mcast_probes;
2652 t->neigh_vars[1].data = &p->ucast_probes;
2653 t->neigh_vars[2].data = &p->app_probes;
2654 t->neigh_vars[3].data = &p->retrans_time;
2655 t->neigh_vars[4].data = &p->base_reachable_time;
2656 t->neigh_vars[5].data = &p->delay_probe_time;
2657 t->neigh_vars[6].data = &p->gc_staletime;
2658 t->neigh_vars[7].data = &p->queue_len;
2659 t->neigh_vars[8].data = &p->proxy_qlen;
2660 t->neigh_vars[9].data = &p->anycast_delay;
2661 t->neigh_vars[10].data = &p->proxy_delay;
2662 t->neigh_vars[11].data = &p->locktime;
2663 t->neigh_vars[12].data = &p->retrans_time;
2664 t->neigh_vars[13].data = &p->base_reachable_time;
2666 if (dev) {
2667 dev_name_source = dev->name;
2668 t->neigh_dev[0].ctl_name = dev->ifindex;
2669 /* Terminate the table early */
2670 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2671 } else {
2672 dev_name_source = t->neigh_dev[0].procname;
2673 t->neigh_vars[14].data = (int *)(p + 1);
2674 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2675 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2676 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2680 if (handler || strategy) {
2681 /* RetransTime */
2682 t->neigh_vars[3].proc_handler = handler;
2683 t->neigh_vars[3].strategy = strategy;
2684 t->neigh_vars[3].extra1 = dev;
2685 if (!strategy)
2686 t->neigh_vars[3].ctl_name = CTL_UNNUMBERED;
2687 /* ReachableTime */
2688 t->neigh_vars[4].proc_handler = handler;
2689 t->neigh_vars[4].strategy = strategy;
2690 t->neigh_vars[4].extra1 = dev;
2691 if (!strategy)
2692 t->neigh_vars[4].ctl_name = CTL_UNNUMBERED;
2693 /* RetransTime (in milliseconds)*/
2694 t->neigh_vars[12].proc_handler = handler;
2695 t->neigh_vars[12].strategy = strategy;
2696 t->neigh_vars[12].extra1 = dev;
2697 if (!strategy)
2698 t->neigh_vars[12].ctl_name = CTL_UNNUMBERED;
2699 /* ReachableTime (in milliseconds) */
2700 t->neigh_vars[13].proc_handler = handler;
2701 t->neigh_vars[13].strategy = strategy;
2702 t->neigh_vars[13].extra1 = dev;
2703 if (!strategy)
2704 t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
2707 dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2708 if (!dev_name) {
2709 err = -ENOBUFS;
2710 goto free;
2713 t->neigh_dev[0].procname = dev_name;
2715 t->neigh_neigh_dir[0].ctl_name = pdev_id;
2717 t->neigh_proto_dir[0].procname = p_name;
2718 t->neigh_proto_dir[0].ctl_name = p_id;
2720 t->neigh_dev[0].child = t->neigh_vars;
2721 t->neigh_neigh_dir[0].child = t->neigh_dev;
2722 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
2723 t->neigh_root_dir[0].child = t->neigh_proto_dir;
2725 t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
2726 if (!t->sysctl_header) {
2727 err = -ENOBUFS;
2728 goto free_procname;
2730 p->sysctl_table = t;
2731 return 0;
2733 /* error path */
2734 free_procname:
2735 kfree(dev_name);
2736 free:
2737 kfree(t);
2739 return err;
2742 void neigh_sysctl_unregister(struct neigh_parms *p)
2744 if (p->sysctl_table) {
2745 struct neigh_sysctl_table *t = p->sysctl_table;
2746 p->sysctl_table = NULL;
2747 unregister_sysctl_table(t->sysctl_header);
2748 kfree(t->neigh_dev[0].procname);
2749 kfree(t);
2753 #endif /* CONFIG_SYSCTL */
2755 static int __init neigh_init(void)
2757 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2758 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2759 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2761 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2762 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2764 return 0;
2767 subsys_initcall(neigh_init);
2769 EXPORT_SYMBOL(__neigh_event_send);
2770 EXPORT_SYMBOL(neigh_changeaddr);
2771 EXPORT_SYMBOL(neigh_compat_output);
2772 EXPORT_SYMBOL(neigh_connected_output);
2773 EXPORT_SYMBOL(neigh_create);
2774 EXPORT_SYMBOL(neigh_destroy);
2775 EXPORT_SYMBOL(neigh_event_ns);
2776 EXPORT_SYMBOL(neigh_ifdown);
2777 EXPORT_SYMBOL(neigh_lookup);
2778 EXPORT_SYMBOL(neigh_lookup_nodev);
2779 EXPORT_SYMBOL(neigh_parms_alloc);
2780 EXPORT_SYMBOL(neigh_parms_release);
2781 EXPORT_SYMBOL(neigh_rand_reach_time);
2782 EXPORT_SYMBOL(neigh_resolve_output);
2783 EXPORT_SYMBOL(neigh_table_clear);
2784 EXPORT_SYMBOL(neigh_table_init);
2785 EXPORT_SYMBOL(neigh_table_init_no_netlink);
2786 EXPORT_SYMBOL(neigh_update);
2787 EXPORT_SYMBOL(pneigh_enqueue);
2788 EXPORT_SYMBOL(pneigh_lookup);
2790 #ifdef CONFIG_ARPD
2791 EXPORT_SYMBOL(neigh_app_ns);
2792 #endif
2793 #ifdef CONFIG_SYSCTL
2794 EXPORT_SYMBOL(neigh_sysctl_register);
2795 EXPORT_SYMBOL(neigh_sysctl_unregister);
2796 #endif