Linux 5.1-rc1
[linux/fpc-iii.git] / net / core / neighbour.c
blob30f6fd8f68e0dc42801686ede3886f366ee1732b
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/dst.h>
35 #include <net/sock.h>
36 #include <net/netevent.h>
37 #include <net/netlink.h>
38 #include <linux/rtnetlink.h>
39 #include <linux/random.h>
40 #include <linux/string.h>
41 #include <linux/log2.h>
42 #include <linux/inetdevice.h>
43 #include <net/addrconf.h>
45 #include <trace/events/neigh.h>
47 #define DEBUG
48 #define NEIGH_DEBUG 1
49 #define neigh_dbg(level, fmt, ...) \
50 do { \
51 if (level <= NEIGH_DEBUG) \
52 pr_debug(fmt, ##__VA_ARGS__); \
53 } while (0)
55 #define PNEIGH_HASHMASK 0xF
57 static void neigh_timer_handler(struct timer_list *t);
58 static void __neigh_notify(struct neighbour *n, int type, int flags,
59 u32 pid);
60 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
61 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
62 struct net_device *dev);
64 #ifdef CONFIG_PROC_FS
65 static const struct seq_operations neigh_stat_seq_ops;
66 #endif
69 Neighbour hash table buckets are protected with rwlock tbl->lock.
71 - All the scans/updates to hash buckets MUST be made under this lock.
72 - NOTHING clever should be made under this lock: no callbacks
73 to protocol backends, no attempts to send something to network.
74 It will result in deadlocks, if backend/driver wants to use neighbour
75 cache.
76 - If the entry requires some non-trivial actions, increase
77 its reference count and release table lock.
79 Neighbour entries are protected:
80 - with reference count.
81 - with rwlock neigh->lock
83 Reference count prevents destruction.
85 neigh->lock mainly serializes ll address data and its validity state.
86 However, the same lock is used to protect another entry fields:
87 - timer
88 - resolution queue
90 Again, nothing clever shall be made under neigh->lock,
91 the most complicated procedure, which we allow is dev->hard_header.
92 It is supposed, that dev->hard_header is simplistic and does
93 not make callbacks to neighbour tables.
96 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
98 kfree_skb(skb);
99 return -ENETDOWN;
102 static void neigh_cleanup_and_release(struct neighbour *neigh)
104 if (neigh->parms->neigh_cleanup)
105 neigh->parms->neigh_cleanup(neigh);
107 trace_neigh_cleanup_and_release(neigh, 0);
108 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
109 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
110 neigh_release(neigh);
114 * It is random distribution in the interval (1/2)*base...(3/2)*base.
115 * It corresponds to default IPv6 settings and is not overridable,
116 * because it is really reasonable choice.
119 unsigned long neigh_rand_reach_time(unsigned long base)
121 return base ? (prandom_u32() % base) + (base >> 1) : 0;
123 EXPORT_SYMBOL(neigh_rand_reach_time);
125 static void neigh_mark_dead(struct neighbour *n)
127 n->dead = 1;
128 if (!list_empty(&n->gc_list)) {
129 list_del_init(&n->gc_list);
130 atomic_dec(&n->tbl->gc_entries);
134 static void neigh_update_gc_list(struct neighbour *n)
136 bool on_gc_list, exempt_from_gc;
138 write_lock_bh(&n->tbl->lock);
139 write_lock(&n->lock);
141 /* remove from the gc list if new state is permanent or if neighbor
142 * is externally learned; otherwise entry should be on the gc list
144 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
145 n->flags & NTF_EXT_LEARNED;
146 on_gc_list = !list_empty(&n->gc_list);
148 if (exempt_from_gc && on_gc_list) {
149 list_del_init(&n->gc_list);
150 atomic_dec(&n->tbl->gc_entries);
151 } else if (!exempt_from_gc && !on_gc_list) {
152 /* add entries to the tail; cleaning removes from the front */
153 list_add_tail(&n->gc_list, &n->tbl->gc_list);
154 atomic_inc(&n->tbl->gc_entries);
157 write_unlock(&n->lock);
158 write_unlock_bh(&n->tbl->lock);
161 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
162 int *notify)
164 bool rc = false;
165 u8 ndm_flags;
167 if (!(flags & NEIGH_UPDATE_F_ADMIN))
168 return rc;
170 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
171 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
172 if (ndm_flags & NTF_EXT_LEARNED)
173 neigh->flags |= NTF_EXT_LEARNED;
174 else
175 neigh->flags &= ~NTF_EXT_LEARNED;
176 rc = true;
177 *notify = 1;
180 return rc;
183 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
184 struct neigh_table *tbl)
186 bool retval = false;
188 write_lock(&n->lock);
189 if (refcount_read(&n->refcnt) == 1) {
190 struct neighbour *neigh;
192 neigh = rcu_dereference_protected(n->next,
193 lockdep_is_held(&tbl->lock));
194 rcu_assign_pointer(*np, neigh);
195 neigh_mark_dead(n);
196 retval = true;
198 write_unlock(&n->lock);
199 if (retval)
200 neigh_cleanup_and_release(n);
201 return retval;
204 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
206 struct neigh_hash_table *nht;
207 void *pkey = ndel->primary_key;
208 u32 hash_val;
209 struct neighbour *n;
210 struct neighbour __rcu **np;
212 nht = rcu_dereference_protected(tbl->nht,
213 lockdep_is_held(&tbl->lock));
214 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
215 hash_val = hash_val >> (32 - nht->hash_shift);
217 np = &nht->hash_buckets[hash_val];
218 while ((n = rcu_dereference_protected(*np,
219 lockdep_is_held(&tbl->lock)))) {
220 if (n == ndel)
221 return neigh_del(n, np, tbl);
222 np = &n->next;
224 return false;
227 static int neigh_forced_gc(struct neigh_table *tbl)
229 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
230 unsigned long tref = jiffies - 5 * HZ;
231 struct neighbour *n, *tmp;
232 int shrunk = 0;
234 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
236 write_lock_bh(&tbl->lock);
238 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
239 if (refcount_read(&n->refcnt) == 1) {
240 bool remove = false;
242 write_lock(&n->lock);
243 if ((n->nud_state == NUD_FAILED) ||
244 time_after(tref, n->updated))
245 remove = true;
246 write_unlock(&n->lock);
248 if (remove && neigh_remove_one(n, tbl))
249 shrunk++;
250 if (shrunk >= max_clean)
251 break;
255 tbl->last_flush = jiffies;
257 write_unlock_bh(&tbl->lock);
259 return shrunk;
262 static void neigh_add_timer(struct neighbour *n, unsigned long when)
264 neigh_hold(n);
265 if (unlikely(mod_timer(&n->timer, when))) {
266 printk("NEIGH: BUG, double timer add, state is %x\n",
267 n->nud_state);
268 dump_stack();
272 static int neigh_del_timer(struct neighbour *n)
274 if ((n->nud_state & NUD_IN_TIMER) &&
275 del_timer(&n->timer)) {
276 neigh_release(n);
277 return 1;
279 return 0;
282 static void pneigh_queue_purge(struct sk_buff_head *list)
284 struct sk_buff *skb;
286 while ((skb = skb_dequeue(list)) != NULL) {
287 dev_put(skb->dev);
288 kfree_skb(skb);
292 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
293 bool skip_perm)
295 int i;
296 struct neigh_hash_table *nht;
298 nht = rcu_dereference_protected(tbl->nht,
299 lockdep_is_held(&tbl->lock));
301 for (i = 0; i < (1 << nht->hash_shift); i++) {
302 struct neighbour *n;
303 struct neighbour __rcu **np = &nht->hash_buckets[i];
305 while ((n = rcu_dereference_protected(*np,
306 lockdep_is_held(&tbl->lock))) != NULL) {
307 if (dev && n->dev != dev) {
308 np = &n->next;
309 continue;
311 if (skip_perm && n->nud_state & NUD_PERMANENT) {
312 np = &n->next;
313 continue;
315 rcu_assign_pointer(*np,
316 rcu_dereference_protected(n->next,
317 lockdep_is_held(&tbl->lock)));
318 write_lock(&n->lock);
319 neigh_del_timer(n);
320 neigh_mark_dead(n);
321 if (refcount_read(&n->refcnt) != 1) {
322 /* The most unpleasant situation.
323 We must destroy neighbour entry,
324 but someone still uses it.
326 The destroy will be delayed until
327 the last user releases us, but
328 we must kill timers etc. and move
329 it to safe state.
331 __skb_queue_purge(&n->arp_queue);
332 n->arp_queue_len_bytes = 0;
333 n->output = neigh_blackhole;
334 if (n->nud_state & NUD_VALID)
335 n->nud_state = NUD_NOARP;
336 else
337 n->nud_state = NUD_NONE;
338 neigh_dbg(2, "neigh %p is stray\n", n);
340 write_unlock(&n->lock);
341 neigh_cleanup_and_release(n);
346 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
348 write_lock_bh(&tbl->lock);
349 neigh_flush_dev(tbl, dev, false);
350 write_unlock_bh(&tbl->lock);
352 EXPORT_SYMBOL(neigh_changeaddr);
354 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
355 bool skip_perm)
357 write_lock_bh(&tbl->lock);
358 neigh_flush_dev(tbl, dev, skip_perm);
359 pneigh_ifdown_and_unlock(tbl, dev);
361 del_timer_sync(&tbl->proxy_timer);
362 pneigh_queue_purge(&tbl->proxy_queue);
363 return 0;
366 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
368 __neigh_ifdown(tbl, dev, true);
369 return 0;
371 EXPORT_SYMBOL(neigh_carrier_down);
373 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
375 __neigh_ifdown(tbl, dev, false);
376 return 0;
378 EXPORT_SYMBOL(neigh_ifdown);
380 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
381 struct net_device *dev,
382 bool exempt_from_gc)
384 struct neighbour *n = NULL;
385 unsigned long now = jiffies;
386 int entries;
388 if (exempt_from_gc)
389 goto do_alloc;
391 entries = atomic_inc_return(&tbl->gc_entries) - 1;
392 if (entries >= tbl->gc_thresh3 ||
393 (entries >= tbl->gc_thresh2 &&
394 time_after(now, tbl->last_flush + 5 * HZ))) {
395 if (!neigh_forced_gc(tbl) &&
396 entries >= tbl->gc_thresh3) {
397 net_info_ratelimited("%s: neighbor table overflow!\n",
398 tbl->id);
399 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
400 goto out_entries;
404 do_alloc:
405 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
406 if (!n)
407 goto out_entries;
409 __skb_queue_head_init(&n->arp_queue);
410 rwlock_init(&n->lock);
411 seqlock_init(&n->ha_lock);
412 n->updated = n->used = now;
413 n->nud_state = NUD_NONE;
414 n->output = neigh_blackhole;
415 seqlock_init(&n->hh.hh_lock);
416 n->parms = neigh_parms_clone(&tbl->parms);
417 timer_setup(&n->timer, neigh_timer_handler, 0);
419 NEIGH_CACHE_STAT_INC(tbl, allocs);
420 n->tbl = tbl;
421 refcount_set(&n->refcnt, 1);
422 n->dead = 1;
423 INIT_LIST_HEAD(&n->gc_list);
425 atomic_inc(&tbl->entries);
426 out:
427 return n;
429 out_entries:
430 if (!exempt_from_gc)
431 atomic_dec(&tbl->gc_entries);
432 goto out;
435 static void neigh_get_hash_rnd(u32 *x)
437 *x = get_random_u32() | 1;
440 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
442 size_t size = (1 << shift) * sizeof(struct neighbour *);
443 struct neigh_hash_table *ret;
444 struct neighbour __rcu **buckets;
445 int i;
447 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
448 if (!ret)
449 return NULL;
450 if (size <= PAGE_SIZE) {
451 buckets = kzalloc(size, GFP_ATOMIC);
452 } else {
453 buckets = (struct neighbour __rcu **)
454 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
455 get_order(size));
456 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
458 if (!buckets) {
459 kfree(ret);
460 return NULL;
462 ret->hash_buckets = buckets;
463 ret->hash_shift = shift;
464 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
465 neigh_get_hash_rnd(&ret->hash_rnd[i]);
466 return ret;
469 static void neigh_hash_free_rcu(struct rcu_head *head)
471 struct neigh_hash_table *nht = container_of(head,
472 struct neigh_hash_table,
473 rcu);
474 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
475 struct neighbour __rcu **buckets = nht->hash_buckets;
477 if (size <= PAGE_SIZE) {
478 kfree(buckets);
479 } else {
480 kmemleak_free(buckets);
481 free_pages((unsigned long)buckets, get_order(size));
483 kfree(nht);
486 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
487 unsigned long new_shift)
489 unsigned int i, hash;
490 struct neigh_hash_table *new_nht, *old_nht;
492 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
494 old_nht = rcu_dereference_protected(tbl->nht,
495 lockdep_is_held(&tbl->lock));
496 new_nht = neigh_hash_alloc(new_shift);
497 if (!new_nht)
498 return old_nht;
500 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
501 struct neighbour *n, *next;
503 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
504 lockdep_is_held(&tbl->lock));
505 n != NULL;
506 n = next) {
507 hash = tbl->hash(n->primary_key, n->dev,
508 new_nht->hash_rnd);
510 hash >>= (32 - new_nht->hash_shift);
511 next = rcu_dereference_protected(n->next,
512 lockdep_is_held(&tbl->lock));
514 rcu_assign_pointer(n->next,
515 rcu_dereference_protected(
516 new_nht->hash_buckets[hash],
517 lockdep_is_held(&tbl->lock)));
518 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
522 rcu_assign_pointer(tbl->nht, new_nht);
523 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
524 return new_nht;
527 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
528 struct net_device *dev)
530 struct neighbour *n;
532 NEIGH_CACHE_STAT_INC(tbl, lookups);
534 rcu_read_lock_bh();
535 n = __neigh_lookup_noref(tbl, pkey, dev);
536 if (n) {
537 if (!refcount_inc_not_zero(&n->refcnt))
538 n = NULL;
539 NEIGH_CACHE_STAT_INC(tbl, hits);
542 rcu_read_unlock_bh();
543 return n;
545 EXPORT_SYMBOL(neigh_lookup);
547 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
548 const void *pkey)
550 struct neighbour *n;
551 unsigned int key_len = tbl->key_len;
552 u32 hash_val;
553 struct neigh_hash_table *nht;
555 NEIGH_CACHE_STAT_INC(tbl, lookups);
557 rcu_read_lock_bh();
558 nht = rcu_dereference_bh(tbl->nht);
559 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
561 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
562 n != NULL;
563 n = rcu_dereference_bh(n->next)) {
564 if (!memcmp(n->primary_key, pkey, key_len) &&
565 net_eq(dev_net(n->dev), net)) {
566 if (!refcount_inc_not_zero(&n->refcnt))
567 n = NULL;
568 NEIGH_CACHE_STAT_INC(tbl, hits);
569 break;
573 rcu_read_unlock_bh();
574 return n;
576 EXPORT_SYMBOL(neigh_lookup_nodev);
578 static struct neighbour *___neigh_create(struct neigh_table *tbl,
579 const void *pkey,
580 struct net_device *dev,
581 bool exempt_from_gc, bool want_ref)
583 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
584 u32 hash_val;
585 unsigned int key_len = tbl->key_len;
586 int error;
587 struct neigh_hash_table *nht;
589 if (!n) {
590 rc = ERR_PTR(-ENOBUFS);
591 goto out;
594 memcpy(n->primary_key, pkey, key_len);
595 n->dev = dev;
596 dev_hold(dev);
598 /* Protocol specific setup. */
599 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
600 rc = ERR_PTR(error);
601 goto out_neigh_release;
604 if (dev->netdev_ops->ndo_neigh_construct) {
605 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
606 if (error < 0) {
607 rc = ERR_PTR(error);
608 goto out_neigh_release;
612 /* Device specific setup. */
613 if (n->parms->neigh_setup &&
614 (error = n->parms->neigh_setup(n)) < 0) {
615 rc = ERR_PTR(error);
616 goto out_neigh_release;
619 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
621 write_lock_bh(&tbl->lock);
622 nht = rcu_dereference_protected(tbl->nht,
623 lockdep_is_held(&tbl->lock));
625 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
626 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
628 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
630 if (n->parms->dead) {
631 rc = ERR_PTR(-EINVAL);
632 goto out_tbl_unlock;
635 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
636 lockdep_is_held(&tbl->lock));
637 n1 != NULL;
638 n1 = rcu_dereference_protected(n1->next,
639 lockdep_is_held(&tbl->lock))) {
640 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
641 if (want_ref)
642 neigh_hold(n1);
643 rc = n1;
644 goto out_tbl_unlock;
648 n->dead = 0;
649 if (!exempt_from_gc)
650 list_add_tail(&n->gc_list, &n->tbl->gc_list);
652 if (want_ref)
653 neigh_hold(n);
654 rcu_assign_pointer(n->next,
655 rcu_dereference_protected(nht->hash_buckets[hash_val],
656 lockdep_is_held(&tbl->lock)));
657 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
658 write_unlock_bh(&tbl->lock);
659 neigh_dbg(2, "neigh %p is created\n", n);
660 rc = n;
661 out:
662 return rc;
663 out_tbl_unlock:
664 write_unlock_bh(&tbl->lock);
665 out_neigh_release:
666 neigh_release(n);
667 goto out;
670 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
671 struct net_device *dev, bool want_ref)
673 return ___neigh_create(tbl, pkey, dev, false, want_ref);
675 EXPORT_SYMBOL(__neigh_create);
677 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
679 u32 hash_val = *(u32 *)(pkey + key_len - 4);
680 hash_val ^= (hash_val >> 16);
681 hash_val ^= hash_val >> 8;
682 hash_val ^= hash_val >> 4;
683 hash_val &= PNEIGH_HASHMASK;
684 return hash_val;
687 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
688 struct net *net,
689 const void *pkey,
690 unsigned int key_len,
691 struct net_device *dev)
693 while (n) {
694 if (!memcmp(n->key, pkey, key_len) &&
695 net_eq(pneigh_net(n), net) &&
696 (n->dev == dev || !n->dev))
697 return n;
698 n = n->next;
700 return NULL;
703 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
704 struct net *net, const void *pkey, struct net_device *dev)
706 unsigned int key_len = tbl->key_len;
707 u32 hash_val = pneigh_hash(pkey, key_len);
709 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
710 net, pkey, key_len, dev);
712 EXPORT_SYMBOL_GPL(__pneigh_lookup);
714 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
715 struct net *net, const void *pkey,
716 struct net_device *dev, int creat)
718 struct pneigh_entry *n;
719 unsigned int key_len = tbl->key_len;
720 u32 hash_val = pneigh_hash(pkey, key_len);
722 read_lock_bh(&tbl->lock);
723 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
724 net, pkey, key_len, dev);
725 read_unlock_bh(&tbl->lock);
727 if (n || !creat)
728 goto out;
730 ASSERT_RTNL();
732 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
733 if (!n)
734 goto out;
736 n->protocol = 0;
737 write_pnet(&n->net, net);
738 memcpy(n->key, pkey, key_len);
739 n->dev = dev;
740 if (dev)
741 dev_hold(dev);
743 if (tbl->pconstructor && tbl->pconstructor(n)) {
744 if (dev)
745 dev_put(dev);
746 kfree(n);
747 n = NULL;
748 goto out;
751 write_lock_bh(&tbl->lock);
752 n->next = tbl->phash_buckets[hash_val];
753 tbl->phash_buckets[hash_val] = n;
754 write_unlock_bh(&tbl->lock);
755 out:
756 return n;
758 EXPORT_SYMBOL(pneigh_lookup);
761 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
762 struct net_device *dev)
764 struct pneigh_entry *n, **np;
765 unsigned int key_len = tbl->key_len;
766 u32 hash_val = pneigh_hash(pkey, key_len);
768 write_lock_bh(&tbl->lock);
769 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
770 np = &n->next) {
771 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
772 net_eq(pneigh_net(n), net)) {
773 *np = n->next;
774 write_unlock_bh(&tbl->lock);
775 if (tbl->pdestructor)
776 tbl->pdestructor(n);
777 if (n->dev)
778 dev_put(n->dev);
779 kfree(n);
780 return 0;
783 write_unlock_bh(&tbl->lock);
784 return -ENOENT;
787 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
788 struct net_device *dev)
790 struct pneigh_entry *n, **np, *freelist = NULL;
791 u32 h;
793 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
794 np = &tbl->phash_buckets[h];
795 while ((n = *np) != NULL) {
796 if (!dev || n->dev == dev) {
797 *np = n->next;
798 n->next = freelist;
799 freelist = n;
800 continue;
802 np = &n->next;
805 write_unlock_bh(&tbl->lock);
806 while ((n = freelist)) {
807 freelist = n->next;
808 n->next = NULL;
809 if (tbl->pdestructor)
810 tbl->pdestructor(n);
811 if (n->dev)
812 dev_put(n->dev);
813 kfree(n);
815 return -ENOENT;
818 static void neigh_parms_destroy(struct neigh_parms *parms);
820 static inline void neigh_parms_put(struct neigh_parms *parms)
822 if (refcount_dec_and_test(&parms->refcnt))
823 neigh_parms_destroy(parms);
827 * neighbour must already be out of the table;
830 void neigh_destroy(struct neighbour *neigh)
832 struct net_device *dev = neigh->dev;
834 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
836 if (!neigh->dead) {
837 pr_warn("Destroying alive neighbour %p\n", neigh);
838 dump_stack();
839 return;
842 if (neigh_del_timer(neigh))
843 pr_warn("Impossible event\n");
845 write_lock_bh(&neigh->lock);
846 __skb_queue_purge(&neigh->arp_queue);
847 write_unlock_bh(&neigh->lock);
848 neigh->arp_queue_len_bytes = 0;
850 if (dev->netdev_ops->ndo_neigh_destroy)
851 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
853 dev_put(dev);
854 neigh_parms_put(neigh->parms);
856 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
858 atomic_dec(&neigh->tbl->entries);
859 kfree_rcu(neigh, rcu);
861 EXPORT_SYMBOL(neigh_destroy);
863 /* Neighbour state is suspicious;
864 disable fast path.
866 Called with write_locked neigh.
868 static void neigh_suspect(struct neighbour *neigh)
870 neigh_dbg(2, "neigh %p is suspected\n", neigh);
872 neigh->output = neigh->ops->output;
875 /* Neighbour state is OK;
876 enable fast path.
878 Called with write_locked neigh.
880 static void neigh_connect(struct neighbour *neigh)
882 neigh_dbg(2, "neigh %p is connected\n", neigh);
884 neigh->output = neigh->ops->connected_output;
887 static void neigh_periodic_work(struct work_struct *work)
889 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
890 struct neighbour *n;
891 struct neighbour __rcu **np;
892 unsigned int i;
893 struct neigh_hash_table *nht;
895 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
897 write_lock_bh(&tbl->lock);
898 nht = rcu_dereference_protected(tbl->nht,
899 lockdep_is_held(&tbl->lock));
902 * periodically recompute ReachableTime from random function
905 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
906 struct neigh_parms *p;
907 tbl->last_rand = jiffies;
908 list_for_each_entry(p, &tbl->parms_list, list)
909 p->reachable_time =
910 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
913 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
914 goto out;
916 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
917 np = &nht->hash_buckets[i];
919 while ((n = rcu_dereference_protected(*np,
920 lockdep_is_held(&tbl->lock))) != NULL) {
921 unsigned int state;
923 write_lock(&n->lock);
925 state = n->nud_state;
926 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
927 (n->flags & NTF_EXT_LEARNED)) {
928 write_unlock(&n->lock);
929 goto next_elt;
932 if (time_before(n->used, n->confirmed))
933 n->used = n->confirmed;
935 if (refcount_read(&n->refcnt) == 1 &&
936 (state == NUD_FAILED ||
937 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
938 *np = n->next;
939 neigh_mark_dead(n);
940 write_unlock(&n->lock);
941 neigh_cleanup_and_release(n);
942 continue;
944 write_unlock(&n->lock);
946 next_elt:
947 np = &n->next;
950 * It's fine to release lock here, even if hash table
951 * grows while we are preempted.
953 write_unlock_bh(&tbl->lock);
954 cond_resched();
955 write_lock_bh(&tbl->lock);
956 nht = rcu_dereference_protected(tbl->nht,
957 lockdep_is_held(&tbl->lock));
959 out:
960 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
961 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
962 * BASE_REACHABLE_TIME.
964 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
965 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
966 write_unlock_bh(&tbl->lock);
969 static __inline__ int neigh_max_probes(struct neighbour *n)
971 struct neigh_parms *p = n->parms;
972 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
973 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
974 NEIGH_VAR(p, MCAST_PROBES));
977 static void neigh_invalidate(struct neighbour *neigh)
978 __releases(neigh->lock)
979 __acquires(neigh->lock)
981 struct sk_buff *skb;
983 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
984 neigh_dbg(2, "neigh %p is failed\n", neigh);
985 neigh->updated = jiffies;
987 /* It is very thin place. report_unreachable is very complicated
988 routine. Particularly, it can hit the same neighbour entry!
990 So that, we try to be accurate and avoid dead loop. --ANK
992 while (neigh->nud_state == NUD_FAILED &&
993 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
994 write_unlock(&neigh->lock);
995 neigh->ops->error_report(neigh, skb);
996 write_lock(&neigh->lock);
998 __skb_queue_purge(&neigh->arp_queue);
999 neigh->arp_queue_len_bytes = 0;
1002 static void neigh_probe(struct neighbour *neigh)
1003 __releases(neigh->lock)
1005 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1006 /* keep skb alive even if arp_queue overflows */
1007 if (skb)
1008 skb = skb_clone(skb, GFP_ATOMIC);
1009 write_unlock(&neigh->lock);
1010 if (neigh->ops->solicit)
1011 neigh->ops->solicit(neigh, skb);
1012 atomic_inc(&neigh->probes);
1013 consume_skb(skb);
1016 /* Called when a timer expires for a neighbour entry. */
1018 static void neigh_timer_handler(struct timer_list *t)
1020 unsigned long now, next;
1021 struct neighbour *neigh = from_timer(neigh, t, timer);
1022 unsigned int state;
1023 int notify = 0;
1025 write_lock(&neigh->lock);
1027 state = neigh->nud_state;
1028 now = jiffies;
1029 next = now + HZ;
1031 if (!(state & NUD_IN_TIMER))
1032 goto out;
1034 if (state & NUD_REACHABLE) {
1035 if (time_before_eq(now,
1036 neigh->confirmed + neigh->parms->reachable_time)) {
1037 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1038 next = neigh->confirmed + neigh->parms->reachable_time;
1039 } else if (time_before_eq(now,
1040 neigh->used +
1041 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1042 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1043 neigh->nud_state = NUD_DELAY;
1044 neigh->updated = jiffies;
1045 neigh_suspect(neigh);
1046 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1047 } else {
1048 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1049 neigh->nud_state = NUD_STALE;
1050 neigh->updated = jiffies;
1051 neigh_suspect(neigh);
1052 notify = 1;
1054 } else if (state & NUD_DELAY) {
1055 if (time_before_eq(now,
1056 neigh->confirmed +
1057 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1058 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1059 neigh->nud_state = NUD_REACHABLE;
1060 neigh->updated = jiffies;
1061 neigh_connect(neigh);
1062 notify = 1;
1063 next = neigh->confirmed + neigh->parms->reachable_time;
1064 } else {
1065 neigh_dbg(2, "neigh %p is probed\n", neigh);
1066 neigh->nud_state = NUD_PROBE;
1067 neigh->updated = jiffies;
1068 atomic_set(&neigh->probes, 0);
1069 notify = 1;
1070 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1072 } else {
1073 /* NUD_PROBE|NUD_INCOMPLETE */
1074 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1077 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1078 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1079 neigh->nud_state = NUD_FAILED;
1080 notify = 1;
1081 neigh_invalidate(neigh);
1082 goto out;
1085 if (neigh->nud_state & NUD_IN_TIMER) {
1086 if (time_before(next, jiffies + HZ/2))
1087 next = jiffies + HZ/2;
1088 if (!mod_timer(&neigh->timer, next))
1089 neigh_hold(neigh);
1091 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1092 neigh_probe(neigh);
1093 } else {
1094 out:
1095 write_unlock(&neigh->lock);
1098 if (notify)
1099 neigh_update_notify(neigh, 0);
1101 trace_neigh_timer_handler(neigh, 0);
1103 neigh_release(neigh);
1106 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1108 int rc;
1109 bool immediate_probe = false;
1111 write_lock_bh(&neigh->lock);
1113 rc = 0;
1114 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1115 goto out_unlock_bh;
1116 if (neigh->dead)
1117 goto out_dead;
1119 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1120 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1121 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1122 unsigned long next, now = jiffies;
1124 atomic_set(&neigh->probes,
1125 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1126 neigh->nud_state = NUD_INCOMPLETE;
1127 neigh->updated = now;
1128 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1129 HZ/2);
1130 neigh_add_timer(neigh, next);
1131 immediate_probe = true;
1132 } else {
1133 neigh->nud_state = NUD_FAILED;
1134 neigh->updated = jiffies;
1135 write_unlock_bh(&neigh->lock);
1137 kfree_skb(skb);
1138 return 1;
1140 } else if (neigh->nud_state & NUD_STALE) {
1141 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1142 neigh->nud_state = NUD_DELAY;
1143 neigh->updated = jiffies;
1144 neigh_add_timer(neigh, jiffies +
1145 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1148 if (neigh->nud_state == NUD_INCOMPLETE) {
1149 if (skb) {
1150 while (neigh->arp_queue_len_bytes + skb->truesize >
1151 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1152 struct sk_buff *buff;
1154 buff = __skb_dequeue(&neigh->arp_queue);
1155 if (!buff)
1156 break;
1157 neigh->arp_queue_len_bytes -= buff->truesize;
1158 kfree_skb(buff);
1159 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1161 skb_dst_force(skb);
1162 __skb_queue_tail(&neigh->arp_queue, skb);
1163 neigh->arp_queue_len_bytes += skb->truesize;
1165 rc = 1;
1167 out_unlock_bh:
1168 if (immediate_probe)
1169 neigh_probe(neigh);
1170 else
1171 write_unlock(&neigh->lock);
1172 local_bh_enable();
1173 trace_neigh_event_send_done(neigh, rc);
1174 return rc;
1176 out_dead:
1177 if (neigh->nud_state & NUD_STALE)
1178 goto out_unlock_bh;
1179 write_unlock_bh(&neigh->lock);
1180 kfree_skb(skb);
1181 trace_neigh_event_send_dead(neigh, 1);
1182 return 1;
1184 EXPORT_SYMBOL(__neigh_event_send);
1186 static void neigh_update_hhs(struct neighbour *neigh)
1188 struct hh_cache *hh;
1189 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1190 = NULL;
1192 if (neigh->dev->header_ops)
1193 update = neigh->dev->header_ops->cache_update;
1195 if (update) {
1196 hh = &neigh->hh;
1197 if (hh->hh_len) {
1198 write_seqlock_bh(&hh->hh_lock);
1199 update(hh, neigh->dev, neigh->ha);
1200 write_sequnlock_bh(&hh->hh_lock);
1207 /* Generic update routine.
1208 -- lladdr is new lladdr or NULL, if it is not supplied.
1209 -- new is new state.
1210 -- flags
1211 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1212 if it is different.
1213 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1214 lladdr instead of overriding it
1215 if it is different.
1216 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1218 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1219 NTF_ROUTER flag.
1220 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1221 a router.
1223 Caller MUST hold reference count on the entry.
1226 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1227 u8 new, u32 flags, u32 nlmsg_pid,
1228 struct netlink_ext_ack *extack)
1230 bool ext_learn_change = false;
1231 u8 old;
1232 int err;
1233 int notify = 0;
1234 struct net_device *dev;
1235 int update_isrouter = 0;
1237 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1239 write_lock_bh(&neigh->lock);
1241 dev = neigh->dev;
1242 old = neigh->nud_state;
1243 err = -EPERM;
1245 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1246 (old & (NUD_NOARP | NUD_PERMANENT)))
1247 goto out;
1248 if (neigh->dead) {
1249 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1250 goto out;
1253 ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
1255 if (!(new & NUD_VALID)) {
1256 neigh_del_timer(neigh);
1257 if (old & NUD_CONNECTED)
1258 neigh_suspect(neigh);
1259 neigh->nud_state = new;
1260 err = 0;
1261 notify = old & NUD_VALID;
1262 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1263 (new & NUD_FAILED)) {
1264 neigh_invalidate(neigh);
1265 notify = 1;
1267 goto out;
1270 /* Compare new lladdr with cached one */
1271 if (!dev->addr_len) {
1272 /* First case: device needs no address. */
1273 lladdr = neigh->ha;
1274 } else if (lladdr) {
1275 /* The second case: if something is already cached
1276 and a new address is proposed:
1277 - compare new & old
1278 - if they are different, check override flag
1280 if ((old & NUD_VALID) &&
1281 !memcmp(lladdr, neigh->ha, dev->addr_len))
1282 lladdr = neigh->ha;
1283 } else {
1284 /* No address is supplied; if we know something,
1285 use it, otherwise discard the request.
1287 err = -EINVAL;
1288 if (!(old & NUD_VALID)) {
1289 NL_SET_ERR_MSG(extack, "No link layer address given");
1290 goto out;
1292 lladdr = neigh->ha;
1295 /* Update confirmed timestamp for neighbour entry after we
1296 * received ARP packet even if it doesn't change IP to MAC binding.
1298 if (new & NUD_CONNECTED)
1299 neigh->confirmed = jiffies;
1301 /* If entry was valid and address is not changed,
1302 do not change entry state, if new one is STALE.
1304 err = 0;
1305 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1306 if (old & NUD_VALID) {
1307 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1308 update_isrouter = 0;
1309 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1310 (old & NUD_CONNECTED)) {
1311 lladdr = neigh->ha;
1312 new = NUD_STALE;
1313 } else
1314 goto out;
1315 } else {
1316 if (lladdr == neigh->ha && new == NUD_STALE &&
1317 !(flags & NEIGH_UPDATE_F_ADMIN))
1318 new = old;
1322 /* Update timestamp only once we know we will make a change to the
1323 * neighbour entry. Otherwise we risk to move the locktime window with
1324 * noop updates and ignore relevant ARP updates.
1326 if (new != old || lladdr != neigh->ha)
1327 neigh->updated = jiffies;
1329 if (new != old) {
1330 neigh_del_timer(neigh);
1331 if (new & NUD_PROBE)
1332 atomic_set(&neigh->probes, 0);
1333 if (new & NUD_IN_TIMER)
1334 neigh_add_timer(neigh, (jiffies +
1335 ((new & NUD_REACHABLE) ?
1336 neigh->parms->reachable_time :
1337 0)));
1338 neigh->nud_state = new;
1339 notify = 1;
1342 if (lladdr != neigh->ha) {
1343 write_seqlock(&neigh->ha_lock);
1344 memcpy(&neigh->ha, lladdr, dev->addr_len);
1345 write_sequnlock(&neigh->ha_lock);
1346 neigh_update_hhs(neigh);
1347 if (!(new & NUD_CONNECTED))
1348 neigh->confirmed = jiffies -
1349 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1350 notify = 1;
1352 if (new == old)
1353 goto out;
1354 if (new & NUD_CONNECTED)
1355 neigh_connect(neigh);
1356 else
1357 neigh_suspect(neigh);
1358 if (!(old & NUD_VALID)) {
1359 struct sk_buff *skb;
1361 /* Again: avoid dead loop if something went wrong */
1363 while (neigh->nud_state & NUD_VALID &&
1364 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1365 struct dst_entry *dst = skb_dst(skb);
1366 struct neighbour *n2, *n1 = neigh;
1367 write_unlock_bh(&neigh->lock);
1369 rcu_read_lock();
1371 /* Why not just use 'neigh' as-is? The problem is that
1372 * things such as shaper, eql, and sch_teql can end up
1373 * using alternative, different, neigh objects to output
1374 * the packet in the output path. So what we need to do
1375 * here is re-lookup the top-level neigh in the path so
1376 * we can reinject the packet there.
1378 n2 = NULL;
1379 if (dst) {
1380 n2 = dst_neigh_lookup_skb(dst, skb);
1381 if (n2)
1382 n1 = n2;
1384 n1->output(n1, skb);
1385 if (n2)
1386 neigh_release(n2);
1387 rcu_read_unlock();
1389 write_lock_bh(&neigh->lock);
1391 __skb_queue_purge(&neigh->arp_queue);
1392 neigh->arp_queue_len_bytes = 0;
1394 out:
1395 if (update_isrouter)
1396 neigh_update_is_router(neigh, flags, &notify);
1397 write_unlock_bh(&neigh->lock);
1399 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1400 neigh_update_gc_list(neigh);
1402 if (notify)
1403 neigh_update_notify(neigh, nlmsg_pid);
1405 trace_neigh_update_done(neigh, err);
1407 return err;
1410 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1411 u32 flags, u32 nlmsg_pid)
1413 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1415 EXPORT_SYMBOL(neigh_update);
1417 /* Update the neigh to listen temporarily for probe responses, even if it is
1418 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1420 void __neigh_set_probe_once(struct neighbour *neigh)
1422 if (neigh->dead)
1423 return;
1424 neigh->updated = jiffies;
1425 if (!(neigh->nud_state & NUD_FAILED))
1426 return;
1427 neigh->nud_state = NUD_INCOMPLETE;
1428 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1429 neigh_add_timer(neigh,
1430 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1432 EXPORT_SYMBOL(__neigh_set_probe_once);
1434 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1435 u8 *lladdr, void *saddr,
1436 struct net_device *dev)
1438 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1439 lladdr || !dev->addr_len);
1440 if (neigh)
1441 neigh_update(neigh, lladdr, NUD_STALE,
1442 NEIGH_UPDATE_F_OVERRIDE, 0);
1443 return neigh;
1445 EXPORT_SYMBOL(neigh_event_ns);
1447 /* called with read_lock_bh(&n->lock); */
1448 static void neigh_hh_init(struct neighbour *n)
1450 struct net_device *dev = n->dev;
1451 __be16 prot = n->tbl->protocol;
1452 struct hh_cache *hh = &n->hh;
1454 write_lock_bh(&n->lock);
1456 /* Only one thread can come in here and initialize the
1457 * hh_cache entry.
1459 if (!hh->hh_len)
1460 dev->header_ops->cache(n, hh, prot);
1462 write_unlock_bh(&n->lock);
1465 /* Slow and careful. */
1467 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1469 int rc = 0;
1471 if (!neigh_event_send(neigh, skb)) {
1472 int err;
1473 struct net_device *dev = neigh->dev;
1474 unsigned int seq;
1476 if (dev->header_ops->cache && !neigh->hh.hh_len)
1477 neigh_hh_init(neigh);
1479 do {
1480 __skb_pull(skb, skb_network_offset(skb));
1481 seq = read_seqbegin(&neigh->ha_lock);
1482 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1483 neigh->ha, NULL, skb->len);
1484 } while (read_seqretry(&neigh->ha_lock, seq));
1486 if (err >= 0)
1487 rc = dev_queue_xmit(skb);
1488 else
1489 goto out_kfree_skb;
1491 out:
1492 return rc;
1493 out_kfree_skb:
1494 rc = -EINVAL;
1495 kfree_skb(skb);
1496 goto out;
1498 EXPORT_SYMBOL(neigh_resolve_output);
1500 /* As fast as possible without hh cache */
1502 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1504 struct net_device *dev = neigh->dev;
1505 unsigned int seq;
1506 int err;
1508 do {
1509 __skb_pull(skb, skb_network_offset(skb));
1510 seq = read_seqbegin(&neigh->ha_lock);
1511 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1512 neigh->ha, NULL, skb->len);
1513 } while (read_seqretry(&neigh->ha_lock, seq));
1515 if (err >= 0)
1516 err = dev_queue_xmit(skb);
1517 else {
1518 err = -EINVAL;
1519 kfree_skb(skb);
1521 return err;
1523 EXPORT_SYMBOL(neigh_connected_output);
1525 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1527 return dev_queue_xmit(skb);
1529 EXPORT_SYMBOL(neigh_direct_output);
1531 static void neigh_proxy_process(struct timer_list *t)
1533 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1534 long sched_next = 0;
1535 unsigned long now = jiffies;
1536 struct sk_buff *skb, *n;
1538 spin_lock(&tbl->proxy_queue.lock);
1540 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1541 long tdif = NEIGH_CB(skb)->sched_next - now;
1543 if (tdif <= 0) {
1544 struct net_device *dev = skb->dev;
1546 __skb_unlink(skb, &tbl->proxy_queue);
1547 if (tbl->proxy_redo && netif_running(dev)) {
1548 rcu_read_lock();
1549 tbl->proxy_redo(skb);
1550 rcu_read_unlock();
1551 } else {
1552 kfree_skb(skb);
1555 dev_put(dev);
1556 } else if (!sched_next || tdif < sched_next)
1557 sched_next = tdif;
1559 del_timer(&tbl->proxy_timer);
1560 if (sched_next)
1561 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1562 spin_unlock(&tbl->proxy_queue.lock);
1565 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1566 struct sk_buff *skb)
1568 unsigned long now = jiffies;
1570 unsigned long sched_next = now + (prandom_u32() %
1571 NEIGH_VAR(p, PROXY_DELAY));
1573 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1574 kfree_skb(skb);
1575 return;
1578 NEIGH_CB(skb)->sched_next = sched_next;
1579 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1581 spin_lock(&tbl->proxy_queue.lock);
1582 if (del_timer(&tbl->proxy_timer)) {
1583 if (time_before(tbl->proxy_timer.expires, sched_next))
1584 sched_next = tbl->proxy_timer.expires;
1586 skb_dst_drop(skb);
1587 dev_hold(skb->dev);
1588 __skb_queue_tail(&tbl->proxy_queue, skb);
1589 mod_timer(&tbl->proxy_timer, sched_next);
1590 spin_unlock(&tbl->proxy_queue.lock);
1592 EXPORT_SYMBOL(pneigh_enqueue);
1594 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1595 struct net *net, int ifindex)
1597 struct neigh_parms *p;
1599 list_for_each_entry(p, &tbl->parms_list, list) {
1600 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1601 (!p->dev && !ifindex && net_eq(net, &init_net)))
1602 return p;
1605 return NULL;
1608 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1609 struct neigh_table *tbl)
1611 struct neigh_parms *p;
1612 struct net *net = dev_net(dev);
1613 const struct net_device_ops *ops = dev->netdev_ops;
1615 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1616 if (p) {
1617 p->tbl = tbl;
1618 refcount_set(&p->refcnt, 1);
1619 p->reachable_time =
1620 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1621 dev_hold(dev);
1622 p->dev = dev;
1623 write_pnet(&p->net, net);
1624 p->sysctl_table = NULL;
1626 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1627 dev_put(dev);
1628 kfree(p);
1629 return NULL;
1632 write_lock_bh(&tbl->lock);
1633 list_add(&p->list, &tbl->parms.list);
1634 write_unlock_bh(&tbl->lock);
1636 neigh_parms_data_state_cleanall(p);
1638 return p;
1640 EXPORT_SYMBOL(neigh_parms_alloc);
1642 static void neigh_rcu_free_parms(struct rcu_head *head)
1644 struct neigh_parms *parms =
1645 container_of(head, struct neigh_parms, rcu_head);
1647 neigh_parms_put(parms);
1650 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1652 if (!parms || parms == &tbl->parms)
1653 return;
1654 write_lock_bh(&tbl->lock);
1655 list_del(&parms->list);
1656 parms->dead = 1;
1657 write_unlock_bh(&tbl->lock);
1658 if (parms->dev)
1659 dev_put(parms->dev);
1660 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1662 EXPORT_SYMBOL(neigh_parms_release);
1664 static void neigh_parms_destroy(struct neigh_parms *parms)
1666 kfree(parms);
1669 static struct lock_class_key neigh_table_proxy_queue_class;
1671 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1673 void neigh_table_init(int index, struct neigh_table *tbl)
1675 unsigned long now = jiffies;
1676 unsigned long phsize;
1678 INIT_LIST_HEAD(&tbl->parms_list);
1679 INIT_LIST_HEAD(&tbl->gc_list);
1680 list_add(&tbl->parms.list, &tbl->parms_list);
1681 write_pnet(&tbl->parms.net, &init_net);
1682 refcount_set(&tbl->parms.refcnt, 1);
1683 tbl->parms.reachable_time =
1684 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1686 tbl->stats = alloc_percpu(struct neigh_statistics);
1687 if (!tbl->stats)
1688 panic("cannot create neighbour cache statistics");
1690 #ifdef CONFIG_PROC_FS
1691 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1692 &neigh_stat_seq_ops, tbl))
1693 panic("cannot create neighbour proc dir entry");
1694 #endif
1696 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1698 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1699 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1701 if (!tbl->nht || !tbl->phash_buckets)
1702 panic("cannot allocate neighbour cache hashes");
1704 if (!tbl->entry_size)
1705 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1706 tbl->key_len, NEIGH_PRIV_ALIGN);
1707 else
1708 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1710 rwlock_init(&tbl->lock);
1711 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1712 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1713 tbl->parms.reachable_time);
1714 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1715 skb_queue_head_init_class(&tbl->proxy_queue,
1716 &neigh_table_proxy_queue_class);
1718 tbl->last_flush = now;
1719 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1721 neigh_tables[index] = tbl;
1723 EXPORT_SYMBOL(neigh_table_init);
1725 int neigh_table_clear(int index, struct neigh_table *tbl)
1727 neigh_tables[index] = NULL;
1728 /* It is not clean... Fix it to unload IPv6 module safely */
1729 cancel_delayed_work_sync(&tbl->gc_work);
1730 del_timer_sync(&tbl->proxy_timer);
1731 pneigh_queue_purge(&tbl->proxy_queue);
1732 neigh_ifdown(tbl, NULL);
1733 if (atomic_read(&tbl->entries))
1734 pr_crit("neighbour leakage\n");
1736 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1737 neigh_hash_free_rcu);
1738 tbl->nht = NULL;
1740 kfree(tbl->phash_buckets);
1741 tbl->phash_buckets = NULL;
1743 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1745 free_percpu(tbl->stats);
1746 tbl->stats = NULL;
1748 return 0;
1750 EXPORT_SYMBOL(neigh_table_clear);
1752 static struct neigh_table *neigh_find_table(int family)
1754 struct neigh_table *tbl = NULL;
1756 switch (family) {
1757 case AF_INET:
1758 tbl = neigh_tables[NEIGH_ARP_TABLE];
1759 break;
1760 case AF_INET6:
1761 tbl = neigh_tables[NEIGH_ND_TABLE];
1762 break;
1763 case AF_DECnet:
1764 tbl = neigh_tables[NEIGH_DN_TABLE];
1765 break;
1768 return tbl;
1771 const struct nla_policy nda_policy[NDA_MAX+1] = {
1772 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1773 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1774 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1775 [NDA_PROBES] = { .type = NLA_U32 },
1776 [NDA_VLAN] = { .type = NLA_U16 },
1777 [NDA_PORT] = { .type = NLA_U16 },
1778 [NDA_VNI] = { .type = NLA_U32 },
1779 [NDA_IFINDEX] = { .type = NLA_U32 },
1780 [NDA_MASTER] = { .type = NLA_U32 },
1781 [NDA_PROTOCOL] = { .type = NLA_U8 },
1784 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1785 struct netlink_ext_ack *extack)
1787 struct net *net = sock_net(skb->sk);
1788 struct ndmsg *ndm;
1789 struct nlattr *dst_attr;
1790 struct neigh_table *tbl;
1791 struct neighbour *neigh;
1792 struct net_device *dev = NULL;
1793 int err = -EINVAL;
1795 ASSERT_RTNL();
1796 if (nlmsg_len(nlh) < sizeof(*ndm))
1797 goto out;
1799 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1800 if (!dst_attr) {
1801 NL_SET_ERR_MSG(extack, "Network address not specified");
1802 goto out;
1805 ndm = nlmsg_data(nlh);
1806 if (ndm->ndm_ifindex) {
1807 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1808 if (dev == NULL) {
1809 err = -ENODEV;
1810 goto out;
1814 tbl = neigh_find_table(ndm->ndm_family);
1815 if (tbl == NULL)
1816 return -EAFNOSUPPORT;
1818 if (nla_len(dst_attr) < (int)tbl->key_len) {
1819 NL_SET_ERR_MSG(extack, "Invalid network address");
1820 goto out;
1823 if (ndm->ndm_flags & NTF_PROXY) {
1824 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1825 goto out;
1828 if (dev == NULL)
1829 goto out;
1831 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1832 if (neigh == NULL) {
1833 err = -ENOENT;
1834 goto out;
1837 err = __neigh_update(neigh, NULL, NUD_FAILED,
1838 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1839 NETLINK_CB(skb).portid, extack);
1840 write_lock_bh(&tbl->lock);
1841 neigh_release(neigh);
1842 neigh_remove_one(neigh, tbl);
1843 write_unlock_bh(&tbl->lock);
1845 out:
1846 return err;
1849 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1850 struct netlink_ext_ack *extack)
1852 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1853 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1854 struct net *net = sock_net(skb->sk);
1855 struct ndmsg *ndm;
1856 struct nlattr *tb[NDA_MAX+1];
1857 struct neigh_table *tbl;
1858 struct net_device *dev = NULL;
1859 struct neighbour *neigh;
1860 void *dst, *lladdr;
1861 u8 protocol = 0;
1862 int err;
1864 ASSERT_RTNL();
1865 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack);
1866 if (err < 0)
1867 goto out;
1869 err = -EINVAL;
1870 if (!tb[NDA_DST]) {
1871 NL_SET_ERR_MSG(extack, "Network address not specified");
1872 goto out;
1875 ndm = nlmsg_data(nlh);
1876 if (ndm->ndm_ifindex) {
1877 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1878 if (dev == NULL) {
1879 err = -ENODEV;
1880 goto out;
1883 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1884 NL_SET_ERR_MSG(extack, "Invalid link address");
1885 goto out;
1889 tbl = neigh_find_table(ndm->ndm_family);
1890 if (tbl == NULL)
1891 return -EAFNOSUPPORT;
1893 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1894 NL_SET_ERR_MSG(extack, "Invalid network address");
1895 goto out;
1898 dst = nla_data(tb[NDA_DST]);
1899 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1901 if (tb[NDA_PROTOCOL])
1902 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1904 if (ndm->ndm_flags & NTF_PROXY) {
1905 struct pneigh_entry *pn;
1907 err = -ENOBUFS;
1908 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1909 if (pn) {
1910 pn->flags = ndm->ndm_flags;
1911 if (protocol)
1912 pn->protocol = protocol;
1913 err = 0;
1915 goto out;
1918 if (!dev) {
1919 NL_SET_ERR_MSG(extack, "Device not specified");
1920 goto out;
1923 neigh = neigh_lookup(tbl, dst, dev);
1924 if (neigh == NULL) {
1925 bool exempt_from_gc;
1927 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1928 err = -ENOENT;
1929 goto out;
1932 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1933 ndm->ndm_flags & NTF_EXT_LEARNED;
1934 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
1935 if (IS_ERR(neigh)) {
1936 err = PTR_ERR(neigh);
1937 goto out;
1939 } else {
1940 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1941 err = -EEXIST;
1942 neigh_release(neigh);
1943 goto out;
1946 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1947 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1948 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1951 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1952 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1954 if (ndm->ndm_flags & NTF_ROUTER)
1955 flags |= NEIGH_UPDATE_F_ISROUTER;
1957 if (ndm->ndm_flags & NTF_USE) {
1958 neigh_event_send(neigh, NULL);
1959 err = 0;
1960 } else
1961 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1962 NETLINK_CB(skb).portid, extack);
1964 if (protocol)
1965 neigh->protocol = protocol;
1967 neigh_release(neigh);
1969 out:
1970 return err;
1973 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1975 struct nlattr *nest;
1977 nest = nla_nest_start(skb, NDTA_PARMS);
1978 if (nest == NULL)
1979 return -ENOBUFS;
1981 if ((parms->dev &&
1982 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1983 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1984 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1985 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1986 /* approximative value for deprecated QUEUE_LEN (in packets) */
1987 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1988 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1989 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1990 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1991 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1992 NEIGH_VAR(parms, UCAST_PROBES)) ||
1993 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1994 NEIGH_VAR(parms, MCAST_PROBES)) ||
1995 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1996 NEIGH_VAR(parms, MCAST_REPROBES)) ||
1997 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1998 NDTPA_PAD) ||
1999 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2000 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2001 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2002 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2003 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2004 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2005 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2006 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2007 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2008 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2009 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2010 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2011 nla_put_msecs(skb, NDTPA_LOCKTIME,
2012 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2013 goto nla_put_failure;
2014 return nla_nest_end(skb, nest);
2016 nla_put_failure:
2017 nla_nest_cancel(skb, nest);
2018 return -EMSGSIZE;
2021 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2022 u32 pid, u32 seq, int type, int flags)
2024 struct nlmsghdr *nlh;
2025 struct ndtmsg *ndtmsg;
2027 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2028 if (nlh == NULL)
2029 return -EMSGSIZE;
2031 ndtmsg = nlmsg_data(nlh);
2033 read_lock_bh(&tbl->lock);
2034 ndtmsg->ndtm_family = tbl->family;
2035 ndtmsg->ndtm_pad1 = 0;
2036 ndtmsg->ndtm_pad2 = 0;
2038 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2039 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2040 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2041 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2042 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2043 goto nla_put_failure;
2045 unsigned long now = jiffies;
2046 unsigned int flush_delta = now - tbl->last_flush;
2047 unsigned int rand_delta = now - tbl->last_rand;
2048 struct neigh_hash_table *nht;
2049 struct ndt_config ndc = {
2050 .ndtc_key_len = tbl->key_len,
2051 .ndtc_entry_size = tbl->entry_size,
2052 .ndtc_entries = atomic_read(&tbl->entries),
2053 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2054 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2055 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2058 rcu_read_lock_bh();
2059 nht = rcu_dereference_bh(tbl->nht);
2060 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2061 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2062 rcu_read_unlock_bh();
2064 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2065 goto nla_put_failure;
2069 int cpu;
2070 struct ndt_stats ndst;
2072 memset(&ndst, 0, sizeof(ndst));
2074 for_each_possible_cpu(cpu) {
2075 struct neigh_statistics *st;
2077 st = per_cpu_ptr(tbl->stats, cpu);
2078 ndst.ndts_allocs += st->allocs;
2079 ndst.ndts_destroys += st->destroys;
2080 ndst.ndts_hash_grows += st->hash_grows;
2081 ndst.ndts_res_failed += st->res_failed;
2082 ndst.ndts_lookups += st->lookups;
2083 ndst.ndts_hits += st->hits;
2084 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2085 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2086 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2087 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2088 ndst.ndts_table_fulls += st->table_fulls;
2091 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2092 NDTA_PAD))
2093 goto nla_put_failure;
2096 BUG_ON(tbl->parms.dev);
2097 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2098 goto nla_put_failure;
2100 read_unlock_bh(&tbl->lock);
2101 nlmsg_end(skb, nlh);
2102 return 0;
2104 nla_put_failure:
2105 read_unlock_bh(&tbl->lock);
2106 nlmsg_cancel(skb, nlh);
2107 return -EMSGSIZE;
2110 static int neightbl_fill_param_info(struct sk_buff *skb,
2111 struct neigh_table *tbl,
2112 struct neigh_parms *parms,
2113 u32 pid, u32 seq, int type,
2114 unsigned int flags)
2116 struct ndtmsg *ndtmsg;
2117 struct nlmsghdr *nlh;
2119 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2120 if (nlh == NULL)
2121 return -EMSGSIZE;
2123 ndtmsg = nlmsg_data(nlh);
2125 read_lock_bh(&tbl->lock);
2126 ndtmsg->ndtm_family = tbl->family;
2127 ndtmsg->ndtm_pad1 = 0;
2128 ndtmsg->ndtm_pad2 = 0;
2130 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2131 neightbl_fill_parms(skb, parms) < 0)
2132 goto errout;
2134 read_unlock_bh(&tbl->lock);
2135 nlmsg_end(skb, nlh);
2136 return 0;
2137 errout:
2138 read_unlock_bh(&tbl->lock);
2139 nlmsg_cancel(skb, nlh);
2140 return -EMSGSIZE;
2143 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2144 [NDTA_NAME] = { .type = NLA_STRING },
2145 [NDTA_THRESH1] = { .type = NLA_U32 },
2146 [NDTA_THRESH2] = { .type = NLA_U32 },
2147 [NDTA_THRESH3] = { .type = NLA_U32 },
2148 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2149 [NDTA_PARMS] = { .type = NLA_NESTED },
2152 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2153 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2154 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2155 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2156 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2157 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2158 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2159 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2160 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2161 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2162 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2163 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2164 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2165 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2166 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2169 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2170 struct netlink_ext_ack *extack)
2172 struct net *net = sock_net(skb->sk);
2173 struct neigh_table *tbl;
2174 struct ndtmsg *ndtmsg;
2175 struct nlattr *tb[NDTA_MAX+1];
2176 bool found = false;
2177 int err, tidx;
2179 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2180 nl_neightbl_policy, extack);
2181 if (err < 0)
2182 goto errout;
2184 if (tb[NDTA_NAME] == NULL) {
2185 err = -EINVAL;
2186 goto errout;
2189 ndtmsg = nlmsg_data(nlh);
2191 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2192 tbl = neigh_tables[tidx];
2193 if (!tbl)
2194 continue;
2195 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2196 continue;
2197 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2198 found = true;
2199 break;
2203 if (!found)
2204 return -ENOENT;
2207 * We acquire tbl->lock to be nice to the periodic timers and
2208 * make sure they always see a consistent set of values.
2210 write_lock_bh(&tbl->lock);
2212 if (tb[NDTA_PARMS]) {
2213 struct nlattr *tbp[NDTPA_MAX+1];
2214 struct neigh_parms *p;
2215 int i, ifindex = 0;
2217 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2218 nl_ntbl_parm_policy, extack);
2219 if (err < 0)
2220 goto errout_tbl_lock;
2222 if (tbp[NDTPA_IFINDEX])
2223 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2225 p = lookup_neigh_parms(tbl, net, ifindex);
2226 if (p == NULL) {
2227 err = -ENOENT;
2228 goto errout_tbl_lock;
2231 for (i = 1; i <= NDTPA_MAX; i++) {
2232 if (tbp[i] == NULL)
2233 continue;
2235 switch (i) {
2236 case NDTPA_QUEUE_LEN:
2237 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2238 nla_get_u32(tbp[i]) *
2239 SKB_TRUESIZE(ETH_FRAME_LEN));
2240 break;
2241 case NDTPA_QUEUE_LENBYTES:
2242 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2243 nla_get_u32(tbp[i]));
2244 break;
2245 case NDTPA_PROXY_QLEN:
2246 NEIGH_VAR_SET(p, PROXY_QLEN,
2247 nla_get_u32(tbp[i]));
2248 break;
2249 case NDTPA_APP_PROBES:
2250 NEIGH_VAR_SET(p, APP_PROBES,
2251 nla_get_u32(tbp[i]));
2252 break;
2253 case NDTPA_UCAST_PROBES:
2254 NEIGH_VAR_SET(p, UCAST_PROBES,
2255 nla_get_u32(tbp[i]));
2256 break;
2257 case NDTPA_MCAST_PROBES:
2258 NEIGH_VAR_SET(p, MCAST_PROBES,
2259 nla_get_u32(tbp[i]));
2260 break;
2261 case NDTPA_MCAST_REPROBES:
2262 NEIGH_VAR_SET(p, MCAST_REPROBES,
2263 nla_get_u32(tbp[i]));
2264 break;
2265 case NDTPA_BASE_REACHABLE_TIME:
2266 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2267 nla_get_msecs(tbp[i]));
2268 /* update reachable_time as well, otherwise, the change will
2269 * only be effective after the next time neigh_periodic_work
2270 * decides to recompute it (can be multiple minutes)
2272 p->reachable_time =
2273 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2274 break;
2275 case NDTPA_GC_STALETIME:
2276 NEIGH_VAR_SET(p, GC_STALETIME,
2277 nla_get_msecs(tbp[i]));
2278 break;
2279 case NDTPA_DELAY_PROBE_TIME:
2280 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2281 nla_get_msecs(tbp[i]));
2282 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2283 break;
2284 case NDTPA_RETRANS_TIME:
2285 NEIGH_VAR_SET(p, RETRANS_TIME,
2286 nla_get_msecs(tbp[i]));
2287 break;
2288 case NDTPA_ANYCAST_DELAY:
2289 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2290 nla_get_msecs(tbp[i]));
2291 break;
2292 case NDTPA_PROXY_DELAY:
2293 NEIGH_VAR_SET(p, PROXY_DELAY,
2294 nla_get_msecs(tbp[i]));
2295 break;
2296 case NDTPA_LOCKTIME:
2297 NEIGH_VAR_SET(p, LOCKTIME,
2298 nla_get_msecs(tbp[i]));
2299 break;
2304 err = -ENOENT;
2305 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2306 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2307 !net_eq(net, &init_net))
2308 goto errout_tbl_lock;
2310 if (tb[NDTA_THRESH1])
2311 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2313 if (tb[NDTA_THRESH2])
2314 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2316 if (tb[NDTA_THRESH3])
2317 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2319 if (tb[NDTA_GC_INTERVAL])
2320 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2322 err = 0;
2324 errout_tbl_lock:
2325 write_unlock_bh(&tbl->lock);
2326 errout:
2327 return err;
2330 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2331 struct netlink_ext_ack *extack)
2333 struct ndtmsg *ndtm;
2335 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2336 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2337 return -EINVAL;
2340 ndtm = nlmsg_data(nlh);
2341 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2342 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2343 return -EINVAL;
2346 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2347 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2348 return -EINVAL;
2351 return 0;
2354 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2356 const struct nlmsghdr *nlh = cb->nlh;
2357 struct net *net = sock_net(skb->sk);
2358 int family, tidx, nidx = 0;
2359 int tbl_skip = cb->args[0];
2360 int neigh_skip = cb->args[1];
2361 struct neigh_table *tbl;
2363 if (cb->strict_check) {
2364 int err = neightbl_valid_dump_info(nlh, cb->extack);
2366 if (err < 0)
2367 return err;
2370 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2372 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2373 struct neigh_parms *p;
2375 tbl = neigh_tables[tidx];
2376 if (!tbl)
2377 continue;
2379 if (tidx < tbl_skip || (family && tbl->family != family))
2380 continue;
2382 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2383 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2384 NLM_F_MULTI) < 0)
2385 break;
2387 nidx = 0;
2388 p = list_next_entry(&tbl->parms, list);
2389 list_for_each_entry_from(p, &tbl->parms_list, list) {
2390 if (!net_eq(neigh_parms_net(p), net))
2391 continue;
2393 if (nidx < neigh_skip)
2394 goto next;
2396 if (neightbl_fill_param_info(skb, tbl, p,
2397 NETLINK_CB(cb->skb).portid,
2398 nlh->nlmsg_seq,
2399 RTM_NEWNEIGHTBL,
2400 NLM_F_MULTI) < 0)
2401 goto out;
2402 next:
2403 nidx++;
2406 neigh_skip = 0;
2408 out:
2409 cb->args[0] = tidx;
2410 cb->args[1] = nidx;
2412 return skb->len;
2415 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2416 u32 pid, u32 seq, int type, unsigned int flags)
2418 unsigned long now = jiffies;
2419 struct nda_cacheinfo ci;
2420 struct nlmsghdr *nlh;
2421 struct ndmsg *ndm;
2423 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2424 if (nlh == NULL)
2425 return -EMSGSIZE;
2427 ndm = nlmsg_data(nlh);
2428 ndm->ndm_family = neigh->ops->family;
2429 ndm->ndm_pad1 = 0;
2430 ndm->ndm_pad2 = 0;
2431 ndm->ndm_flags = neigh->flags;
2432 ndm->ndm_type = neigh->type;
2433 ndm->ndm_ifindex = neigh->dev->ifindex;
2435 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2436 goto nla_put_failure;
2438 read_lock_bh(&neigh->lock);
2439 ndm->ndm_state = neigh->nud_state;
2440 if (neigh->nud_state & NUD_VALID) {
2441 char haddr[MAX_ADDR_LEN];
2443 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2444 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2445 read_unlock_bh(&neigh->lock);
2446 goto nla_put_failure;
2450 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2451 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2452 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2453 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2454 read_unlock_bh(&neigh->lock);
2456 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2457 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2458 goto nla_put_failure;
2460 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2461 goto nla_put_failure;
2463 nlmsg_end(skb, nlh);
2464 return 0;
2466 nla_put_failure:
2467 nlmsg_cancel(skb, nlh);
2468 return -EMSGSIZE;
2471 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2472 u32 pid, u32 seq, int type, unsigned int flags,
2473 struct neigh_table *tbl)
2475 struct nlmsghdr *nlh;
2476 struct ndmsg *ndm;
2478 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2479 if (nlh == NULL)
2480 return -EMSGSIZE;
2482 ndm = nlmsg_data(nlh);
2483 ndm->ndm_family = tbl->family;
2484 ndm->ndm_pad1 = 0;
2485 ndm->ndm_pad2 = 0;
2486 ndm->ndm_flags = pn->flags | NTF_PROXY;
2487 ndm->ndm_type = RTN_UNICAST;
2488 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2489 ndm->ndm_state = NUD_NONE;
2491 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2492 goto nla_put_failure;
2494 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2495 goto nla_put_failure;
2497 nlmsg_end(skb, nlh);
2498 return 0;
2500 nla_put_failure:
2501 nlmsg_cancel(skb, nlh);
2502 return -EMSGSIZE;
2505 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2507 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2508 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2511 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2513 struct net_device *master;
2515 if (!master_idx)
2516 return false;
2518 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2519 if (!master || master->ifindex != master_idx)
2520 return true;
2522 return false;
2525 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2527 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2528 return true;
2530 return false;
2533 struct neigh_dump_filter {
2534 int master_idx;
2535 int dev_idx;
2538 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2539 struct netlink_callback *cb,
2540 struct neigh_dump_filter *filter)
2542 struct net *net = sock_net(skb->sk);
2543 struct neighbour *n;
2544 int rc, h, s_h = cb->args[1];
2545 int idx, s_idx = idx = cb->args[2];
2546 struct neigh_hash_table *nht;
2547 unsigned int flags = NLM_F_MULTI;
2549 if (filter->dev_idx || filter->master_idx)
2550 flags |= NLM_F_DUMP_FILTERED;
2552 rcu_read_lock_bh();
2553 nht = rcu_dereference_bh(tbl->nht);
2555 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2556 if (h > s_h)
2557 s_idx = 0;
2558 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2559 n != NULL;
2560 n = rcu_dereference_bh(n->next)) {
2561 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2562 goto next;
2563 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2564 neigh_master_filtered(n->dev, filter->master_idx))
2565 goto next;
2566 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2567 cb->nlh->nlmsg_seq,
2568 RTM_NEWNEIGH,
2569 flags) < 0) {
2570 rc = -1;
2571 goto out;
2573 next:
2574 idx++;
2577 rc = skb->len;
2578 out:
2579 rcu_read_unlock_bh();
2580 cb->args[1] = h;
2581 cb->args[2] = idx;
2582 return rc;
2585 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2586 struct netlink_callback *cb,
2587 struct neigh_dump_filter *filter)
2589 struct pneigh_entry *n;
2590 struct net *net = sock_net(skb->sk);
2591 int rc, h, s_h = cb->args[3];
2592 int idx, s_idx = idx = cb->args[4];
2593 unsigned int flags = NLM_F_MULTI;
2595 if (filter->dev_idx || filter->master_idx)
2596 flags |= NLM_F_DUMP_FILTERED;
2598 read_lock_bh(&tbl->lock);
2600 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2601 if (h > s_h)
2602 s_idx = 0;
2603 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2604 if (idx < s_idx || pneigh_net(n) != net)
2605 goto next;
2606 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2607 neigh_master_filtered(n->dev, filter->master_idx))
2608 goto next;
2609 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2610 cb->nlh->nlmsg_seq,
2611 RTM_NEWNEIGH, flags, tbl) < 0) {
2612 read_unlock_bh(&tbl->lock);
2613 rc = -1;
2614 goto out;
2616 next:
2617 idx++;
2621 read_unlock_bh(&tbl->lock);
2622 rc = skb->len;
2623 out:
2624 cb->args[3] = h;
2625 cb->args[4] = idx;
2626 return rc;
2630 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2631 bool strict_check,
2632 struct neigh_dump_filter *filter,
2633 struct netlink_ext_ack *extack)
2635 struct nlattr *tb[NDA_MAX + 1];
2636 int err, i;
2638 if (strict_check) {
2639 struct ndmsg *ndm;
2641 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2642 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2643 return -EINVAL;
2646 ndm = nlmsg_data(nlh);
2647 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2648 ndm->ndm_state || ndm->ndm_type) {
2649 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2650 return -EINVAL;
2653 if (ndm->ndm_flags & ~NTF_PROXY) {
2654 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2655 return -EINVAL;
2658 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2659 nda_policy, extack);
2660 } else {
2661 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2662 nda_policy, extack);
2664 if (err < 0)
2665 return err;
2667 for (i = 0; i <= NDA_MAX; ++i) {
2668 if (!tb[i])
2669 continue;
2671 /* all new attributes should require strict_check */
2672 switch (i) {
2673 case NDA_IFINDEX:
2674 filter->dev_idx = nla_get_u32(tb[i]);
2675 break;
2676 case NDA_MASTER:
2677 filter->master_idx = nla_get_u32(tb[i]);
2678 break;
2679 default:
2680 if (strict_check) {
2681 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2682 return -EINVAL;
2687 return 0;
2690 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2692 const struct nlmsghdr *nlh = cb->nlh;
2693 struct neigh_dump_filter filter = {};
2694 struct neigh_table *tbl;
2695 int t, family, s_t;
2696 int proxy = 0;
2697 int err;
2699 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2701 /* check for full ndmsg structure presence, family member is
2702 * the same for both structures
2704 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2705 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2706 proxy = 1;
2708 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2709 if (err < 0 && cb->strict_check)
2710 return err;
2712 s_t = cb->args[0];
2714 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2715 tbl = neigh_tables[t];
2717 if (!tbl)
2718 continue;
2719 if (t < s_t || (family && tbl->family != family))
2720 continue;
2721 if (t > s_t)
2722 memset(&cb->args[1], 0, sizeof(cb->args) -
2723 sizeof(cb->args[0]));
2724 if (proxy)
2725 err = pneigh_dump_table(tbl, skb, cb, &filter);
2726 else
2727 err = neigh_dump_table(tbl, skb, cb, &filter);
2728 if (err < 0)
2729 break;
2732 cb->args[0] = t;
2733 return skb->len;
2736 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2737 struct neigh_table **tbl,
2738 void **dst, int *dev_idx, u8 *ndm_flags,
2739 struct netlink_ext_ack *extack)
2741 struct nlattr *tb[NDA_MAX + 1];
2742 struct ndmsg *ndm;
2743 int err, i;
2745 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2746 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2747 return -EINVAL;
2750 ndm = nlmsg_data(nlh);
2751 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2752 ndm->ndm_type) {
2753 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2754 return -EINVAL;
2757 if (ndm->ndm_flags & ~NTF_PROXY) {
2758 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2759 return -EINVAL;
2762 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2763 nda_policy, extack);
2764 if (err < 0)
2765 return err;
2767 *ndm_flags = ndm->ndm_flags;
2768 *dev_idx = ndm->ndm_ifindex;
2769 *tbl = neigh_find_table(ndm->ndm_family);
2770 if (*tbl == NULL) {
2771 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2772 return -EAFNOSUPPORT;
2775 for (i = 0; i <= NDA_MAX; ++i) {
2776 if (!tb[i])
2777 continue;
2779 switch (i) {
2780 case NDA_DST:
2781 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2782 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2783 return -EINVAL;
2785 *dst = nla_data(tb[i]);
2786 break;
2787 default:
2788 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2789 return -EINVAL;
2793 return 0;
2796 static inline size_t neigh_nlmsg_size(void)
2798 return NLMSG_ALIGN(sizeof(struct ndmsg))
2799 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2800 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2801 + nla_total_size(sizeof(struct nda_cacheinfo))
2802 + nla_total_size(4) /* NDA_PROBES */
2803 + nla_total_size(1); /* NDA_PROTOCOL */
2806 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2807 u32 pid, u32 seq)
2809 struct sk_buff *skb;
2810 int err = 0;
2812 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2813 if (!skb)
2814 return -ENOBUFS;
2816 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2817 if (err) {
2818 kfree_skb(skb);
2819 goto errout;
2822 err = rtnl_unicast(skb, net, pid);
2823 errout:
2824 return err;
2827 static inline size_t pneigh_nlmsg_size(void)
2829 return NLMSG_ALIGN(sizeof(struct ndmsg))
2830 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2831 + nla_total_size(1); /* NDA_PROTOCOL */
2834 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2835 u32 pid, u32 seq, struct neigh_table *tbl)
2837 struct sk_buff *skb;
2838 int err = 0;
2840 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2841 if (!skb)
2842 return -ENOBUFS;
2844 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2845 if (err) {
2846 kfree_skb(skb);
2847 goto errout;
2850 err = rtnl_unicast(skb, net, pid);
2851 errout:
2852 return err;
2855 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2856 struct netlink_ext_ack *extack)
2858 struct net *net = sock_net(in_skb->sk);
2859 struct net_device *dev = NULL;
2860 struct neigh_table *tbl = NULL;
2861 struct neighbour *neigh;
2862 void *dst = NULL;
2863 u8 ndm_flags = 0;
2864 int dev_idx = 0;
2865 int err;
2867 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2868 extack);
2869 if (err < 0)
2870 return err;
2872 if (dev_idx) {
2873 dev = __dev_get_by_index(net, dev_idx);
2874 if (!dev) {
2875 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2876 return -ENODEV;
2880 if (!dst) {
2881 NL_SET_ERR_MSG(extack, "Network address not specified");
2882 return -EINVAL;
2885 if (ndm_flags & NTF_PROXY) {
2886 struct pneigh_entry *pn;
2888 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2889 if (!pn) {
2890 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2891 return -ENOENT;
2893 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2894 nlh->nlmsg_seq, tbl);
2897 if (!dev) {
2898 NL_SET_ERR_MSG(extack, "No device specified");
2899 return -EINVAL;
2902 neigh = neigh_lookup(tbl, dst, dev);
2903 if (!neigh) {
2904 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2905 return -ENOENT;
2908 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2909 nlh->nlmsg_seq);
2911 neigh_release(neigh);
2913 return err;
2916 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2918 int chain;
2919 struct neigh_hash_table *nht;
2921 rcu_read_lock_bh();
2922 nht = rcu_dereference_bh(tbl->nht);
2924 read_lock(&tbl->lock); /* avoid resizes */
2925 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2926 struct neighbour *n;
2928 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2929 n != NULL;
2930 n = rcu_dereference_bh(n->next))
2931 cb(n, cookie);
2933 read_unlock(&tbl->lock);
2934 rcu_read_unlock_bh();
2936 EXPORT_SYMBOL(neigh_for_each);
2938 /* The tbl->lock must be held as a writer and BH disabled. */
2939 void __neigh_for_each_release(struct neigh_table *tbl,
2940 int (*cb)(struct neighbour *))
2942 int chain;
2943 struct neigh_hash_table *nht;
2945 nht = rcu_dereference_protected(tbl->nht,
2946 lockdep_is_held(&tbl->lock));
2947 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2948 struct neighbour *n;
2949 struct neighbour __rcu **np;
2951 np = &nht->hash_buckets[chain];
2952 while ((n = rcu_dereference_protected(*np,
2953 lockdep_is_held(&tbl->lock))) != NULL) {
2954 int release;
2956 write_lock(&n->lock);
2957 release = cb(n);
2958 if (release) {
2959 rcu_assign_pointer(*np,
2960 rcu_dereference_protected(n->next,
2961 lockdep_is_held(&tbl->lock)));
2962 neigh_mark_dead(n);
2963 } else
2964 np = &n->next;
2965 write_unlock(&n->lock);
2966 if (release)
2967 neigh_cleanup_and_release(n);
2971 EXPORT_SYMBOL(__neigh_for_each_release);
2973 int neigh_xmit(int index, struct net_device *dev,
2974 const void *addr, struct sk_buff *skb)
2976 int err = -EAFNOSUPPORT;
2977 if (likely(index < NEIGH_NR_TABLES)) {
2978 struct neigh_table *tbl;
2979 struct neighbour *neigh;
2981 tbl = neigh_tables[index];
2982 if (!tbl)
2983 goto out;
2984 rcu_read_lock_bh();
2985 neigh = __neigh_lookup_noref(tbl, addr, dev);
2986 if (!neigh)
2987 neigh = __neigh_create(tbl, addr, dev, false);
2988 err = PTR_ERR(neigh);
2989 if (IS_ERR(neigh)) {
2990 rcu_read_unlock_bh();
2991 goto out_kfree_skb;
2993 err = neigh->output(neigh, skb);
2994 rcu_read_unlock_bh();
2996 else if (index == NEIGH_LINK_TABLE) {
2997 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2998 addr, NULL, skb->len);
2999 if (err < 0)
3000 goto out_kfree_skb;
3001 err = dev_queue_xmit(skb);
3003 out:
3004 return err;
3005 out_kfree_skb:
3006 kfree_skb(skb);
3007 goto out;
3009 EXPORT_SYMBOL(neigh_xmit);
3011 #ifdef CONFIG_PROC_FS
3013 static struct neighbour *neigh_get_first(struct seq_file *seq)
3015 struct neigh_seq_state *state = seq->private;
3016 struct net *net = seq_file_net(seq);
3017 struct neigh_hash_table *nht = state->nht;
3018 struct neighbour *n = NULL;
3019 int bucket = state->bucket;
3021 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3022 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3023 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3025 while (n) {
3026 if (!net_eq(dev_net(n->dev), net))
3027 goto next;
3028 if (state->neigh_sub_iter) {
3029 loff_t fakep = 0;
3030 void *v;
3032 v = state->neigh_sub_iter(state, n, &fakep);
3033 if (!v)
3034 goto next;
3036 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3037 break;
3038 if (n->nud_state & ~NUD_NOARP)
3039 break;
3040 next:
3041 n = rcu_dereference_bh(n->next);
3044 if (n)
3045 break;
3047 state->bucket = bucket;
3049 return n;
3052 static struct neighbour *neigh_get_next(struct seq_file *seq,
3053 struct neighbour *n,
3054 loff_t *pos)
3056 struct neigh_seq_state *state = seq->private;
3057 struct net *net = seq_file_net(seq);
3058 struct neigh_hash_table *nht = state->nht;
3060 if (state->neigh_sub_iter) {
3061 void *v = state->neigh_sub_iter(state, n, pos);
3062 if (v)
3063 return n;
3065 n = rcu_dereference_bh(n->next);
3067 while (1) {
3068 while (n) {
3069 if (!net_eq(dev_net(n->dev), net))
3070 goto next;
3071 if (state->neigh_sub_iter) {
3072 void *v = state->neigh_sub_iter(state, n, pos);
3073 if (v)
3074 return n;
3075 goto next;
3077 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3078 break;
3080 if (n->nud_state & ~NUD_NOARP)
3081 break;
3082 next:
3083 n = rcu_dereference_bh(n->next);
3086 if (n)
3087 break;
3089 if (++state->bucket >= (1 << nht->hash_shift))
3090 break;
3092 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3095 if (n && pos)
3096 --(*pos);
3097 return n;
3100 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3102 struct neighbour *n = neigh_get_first(seq);
3104 if (n) {
3105 --(*pos);
3106 while (*pos) {
3107 n = neigh_get_next(seq, n, pos);
3108 if (!n)
3109 break;
3112 return *pos ? NULL : n;
3115 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3117 struct neigh_seq_state *state = seq->private;
3118 struct net *net = seq_file_net(seq);
3119 struct neigh_table *tbl = state->tbl;
3120 struct pneigh_entry *pn = NULL;
3121 int bucket = state->bucket;
3123 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3124 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3125 pn = tbl->phash_buckets[bucket];
3126 while (pn && !net_eq(pneigh_net(pn), net))
3127 pn = pn->next;
3128 if (pn)
3129 break;
3131 state->bucket = bucket;
3133 return pn;
3136 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3137 struct pneigh_entry *pn,
3138 loff_t *pos)
3140 struct neigh_seq_state *state = seq->private;
3141 struct net *net = seq_file_net(seq);
3142 struct neigh_table *tbl = state->tbl;
3144 do {
3145 pn = pn->next;
3146 } while (pn && !net_eq(pneigh_net(pn), net));
3148 while (!pn) {
3149 if (++state->bucket > PNEIGH_HASHMASK)
3150 break;
3151 pn = tbl->phash_buckets[state->bucket];
3152 while (pn && !net_eq(pneigh_net(pn), net))
3153 pn = pn->next;
3154 if (pn)
3155 break;
3158 if (pn && pos)
3159 --(*pos);
3161 return pn;
3164 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3166 struct pneigh_entry *pn = pneigh_get_first(seq);
3168 if (pn) {
3169 --(*pos);
3170 while (*pos) {
3171 pn = pneigh_get_next(seq, pn, pos);
3172 if (!pn)
3173 break;
3176 return *pos ? NULL : pn;
3179 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3181 struct neigh_seq_state *state = seq->private;
3182 void *rc;
3183 loff_t idxpos = *pos;
3185 rc = neigh_get_idx(seq, &idxpos);
3186 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3187 rc = pneigh_get_idx(seq, &idxpos);
3189 return rc;
3192 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3193 __acquires(rcu_bh)
3195 struct neigh_seq_state *state = seq->private;
3197 state->tbl = tbl;
3198 state->bucket = 0;
3199 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3201 rcu_read_lock_bh();
3202 state->nht = rcu_dereference_bh(tbl->nht);
3204 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3206 EXPORT_SYMBOL(neigh_seq_start);
3208 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3210 struct neigh_seq_state *state;
3211 void *rc;
3213 if (v == SEQ_START_TOKEN) {
3214 rc = neigh_get_first(seq);
3215 goto out;
3218 state = seq->private;
3219 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3220 rc = neigh_get_next(seq, v, NULL);
3221 if (rc)
3222 goto out;
3223 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3224 rc = pneigh_get_first(seq);
3225 } else {
3226 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3227 rc = pneigh_get_next(seq, v, NULL);
3229 out:
3230 ++(*pos);
3231 return rc;
3233 EXPORT_SYMBOL(neigh_seq_next);
3235 void neigh_seq_stop(struct seq_file *seq, void *v)
3236 __releases(rcu_bh)
3238 rcu_read_unlock_bh();
3240 EXPORT_SYMBOL(neigh_seq_stop);
3242 /* statistics via seq_file */
3244 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3246 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3247 int cpu;
3249 if (*pos == 0)
3250 return SEQ_START_TOKEN;
3252 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3253 if (!cpu_possible(cpu))
3254 continue;
3255 *pos = cpu+1;
3256 return per_cpu_ptr(tbl->stats, cpu);
3258 return NULL;
3261 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3263 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3264 int cpu;
3266 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3267 if (!cpu_possible(cpu))
3268 continue;
3269 *pos = cpu+1;
3270 return per_cpu_ptr(tbl->stats, cpu);
3272 return NULL;
3275 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3280 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3282 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3283 struct neigh_statistics *st = v;
3285 if (v == SEQ_START_TOKEN) {
3286 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3287 return 0;
3290 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3291 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3292 atomic_read(&tbl->entries),
3294 st->allocs,
3295 st->destroys,
3296 st->hash_grows,
3298 st->lookups,
3299 st->hits,
3301 st->res_failed,
3303 st->rcv_probes_mcast,
3304 st->rcv_probes_ucast,
3306 st->periodic_gc_runs,
3307 st->forced_gc_runs,
3308 st->unres_discards,
3309 st->table_fulls
3312 return 0;
3315 static const struct seq_operations neigh_stat_seq_ops = {
3316 .start = neigh_stat_seq_start,
3317 .next = neigh_stat_seq_next,
3318 .stop = neigh_stat_seq_stop,
3319 .show = neigh_stat_seq_show,
3321 #endif /* CONFIG_PROC_FS */
3323 static void __neigh_notify(struct neighbour *n, int type, int flags,
3324 u32 pid)
3326 struct net *net = dev_net(n->dev);
3327 struct sk_buff *skb;
3328 int err = -ENOBUFS;
3330 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3331 if (skb == NULL)
3332 goto errout;
3334 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3335 if (err < 0) {
3336 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3337 WARN_ON(err == -EMSGSIZE);
3338 kfree_skb(skb);
3339 goto errout;
3341 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3342 return;
3343 errout:
3344 if (err < 0)
3345 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3348 void neigh_app_ns(struct neighbour *n)
3350 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3352 EXPORT_SYMBOL(neigh_app_ns);
3354 #ifdef CONFIG_SYSCTL
3355 static int zero;
3356 static int int_max = INT_MAX;
3357 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3359 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3360 void __user *buffer, size_t *lenp, loff_t *ppos)
3362 int size, ret;
3363 struct ctl_table tmp = *ctl;
3365 tmp.extra1 = &zero;
3366 tmp.extra2 = &unres_qlen_max;
3367 tmp.data = &size;
3369 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3370 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3372 if (write && !ret)
3373 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3374 return ret;
3377 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3378 int family)
3380 switch (family) {
3381 case AF_INET:
3382 return __in_dev_arp_parms_get_rcu(dev);
3383 case AF_INET6:
3384 return __in6_dev_nd_parms_get_rcu(dev);
3386 return NULL;
3389 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3390 int index)
3392 struct net_device *dev;
3393 int family = neigh_parms_family(p);
3395 rcu_read_lock();
3396 for_each_netdev_rcu(net, dev) {
3397 struct neigh_parms *dst_p =
3398 neigh_get_dev_parms_rcu(dev, family);
3400 if (dst_p && !test_bit(index, dst_p->data_state))
3401 dst_p->data[index] = p->data[index];
3403 rcu_read_unlock();
3406 static void neigh_proc_update(struct ctl_table *ctl, int write)
3408 struct net_device *dev = ctl->extra1;
3409 struct neigh_parms *p = ctl->extra2;
3410 struct net *net = neigh_parms_net(p);
3411 int index = (int *) ctl->data - p->data;
3413 if (!write)
3414 return;
3416 set_bit(index, p->data_state);
3417 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3418 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3419 if (!dev) /* NULL dev means this is default value */
3420 neigh_copy_dflt_parms(net, p, index);
3423 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3424 void __user *buffer,
3425 size_t *lenp, loff_t *ppos)
3427 struct ctl_table tmp = *ctl;
3428 int ret;
3430 tmp.extra1 = &zero;
3431 tmp.extra2 = &int_max;
3433 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3434 neigh_proc_update(ctl, write);
3435 return ret;
3438 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3439 void __user *buffer, size_t *lenp, loff_t *ppos)
3441 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3443 neigh_proc_update(ctl, write);
3444 return ret;
3446 EXPORT_SYMBOL(neigh_proc_dointvec);
3448 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3449 void __user *buffer,
3450 size_t *lenp, loff_t *ppos)
3452 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3454 neigh_proc_update(ctl, write);
3455 return ret;
3457 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3459 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3460 void __user *buffer,
3461 size_t *lenp, loff_t *ppos)
3463 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3465 neigh_proc_update(ctl, write);
3466 return ret;
3469 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3470 void __user *buffer,
3471 size_t *lenp, loff_t *ppos)
3473 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3475 neigh_proc_update(ctl, write);
3476 return ret;
3478 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3480 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3481 void __user *buffer,
3482 size_t *lenp, loff_t *ppos)
3484 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3486 neigh_proc_update(ctl, write);
3487 return ret;
3490 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3491 void __user *buffer,
3492 size_t *lenp, loff_t *ppos)
3494 struct neigh_parms *p = ctl->extra2;
3495 int ret;
3497 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3498 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3499 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3500 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3501 else
3502 ret = -1;
3504 if (write && ret == 0) {
3505 /* update reachable_time as well, otherwise, the change will
3506 * only be effective after the next time neigh_periodic_work
3507 * decides to recompute it
3509 p->reachable_time =
3510 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3512 return ret;
3515 #define NEIGH_PARMS_DATA_OFFSET(index) \
3516 (&((struct neigh_parms *) 0)->data[index])
3518 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3519 [NEIGH_VAR_ ## attr] = { \
3520 .procname = name, \
3521 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3522 .maxlen = sizeof(int), \
3523 .mode = mval, \
3524 .proc_handler = proc, \
3527 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3528 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3530 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3531 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3533 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3534 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3536 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3537 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3539 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3540 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3542 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3543 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3545 static struct neigh_sysctl_table {
3546 struct ctl_table_header *sysctl_header;
3547 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3548 } neigh_sysctl_template __read_mostly = {
3549 .neigh_vars = {
3550 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3551 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3552 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3553 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3554 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3555 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3556 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3557 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3558 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3559 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3560 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3561 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3562 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3563 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3564 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3565 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3566 [NEIGH_VAR_GC_INTERVAL] = {
3567 .procname = "gc_interval",
3568 .maxlen = sizeof(int),
3569 .mode = 0644,
3570 .proc_handler = proc_dointvec_jiffies,
3572 [NEIGH_VAR_GC_THRESH1] = {
3573 .procname = "gc_thresh1",
3574 .maxlen = sizeof(int),
3575 .mode = 0644,
3576 .extra1 = &zero,
3577 .extra2 = &int_max,
3578 .proc_handler = proc_dointvec_minmax,
3580 [NEIGH_VAR_GC_THRESH2] = {
3581 .procname = "gc_thresh2",
3582 .maxlen = sizeof(int),
3583 .mode = 0644,
3584 .extra1 = &zero,
3585 .extra2 = &int_max,
3586 .proc_handler = proc_dointvec_minmax,
3588 [NEIGH_VAR_GC_THRESH3] = {
3589 .procname = "gc_thresh3",
3590 .maxlen = sizeof(int),
3591 .mode = 0644,
3592 .extra1 = &zero,
3593 .extra2 = &int_max,
3594 .proc_handler = proc_dointvec_minmax,
3600 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3601 proc_handler *handler)
3603 int i;
3604 struct neigh_sysctl_table *t;
3605 const char *dev_name_source;
3606 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3607 char *p_name;
3609 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3610 if (!t)
3611 goto err;
3613 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3614 t->neigh_vars[i].data += (long) p;
3615 t->neigh_vars[i].extra1 = dev;
3616 t->neigh_vars[i].extra2 = p;
3619 if (dev) {
3620 dev_name_source = dev->name;
3621 /* Terminate the table early */
3622 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3623 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3624 } else {
3625 struct neigh_table *tbl = p->tbl;
3626 dev_name_source = "default";
3627 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3628 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3629 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3630 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3633 if (handler) {
3634 /* RetransTime */
3635 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3636 /* ReachableTime */
3637 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3638 /* RetransTime (in milliseconds)*/
3639 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3640 /* ReachableTime (in milliseconds) */
3641 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3642 } else {
3643 /* Those handlers will update p->reachable_time after
3644 * base_reachable_time(_ms) is set to ensure the new timer starts being
3645 * applied after the next neighbour update instead of waiting for
3646 * neigh_periodic_work to update its value (can be multiple minutes)
3647 * So any handler that replaces them should do this as well
3649 /* ReachableTime */
3650 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3651 neigh_proc_base_reachable_time;
3652 /* ReachableTime (in milliseconds) */
3653 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3654 neigh_proc_base_reachable_time;
3657 /* Don't export sysctls to unprivileged users */
3658 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3659 t->neigh_vars[0].procname = NULL;
3661 switch (neigh_parms_family(p)) {
3662 case AF_INET:
3663 p_name = "ipv4";
3664 break;
3665 case AF_INET6:
3666 p_name = "ipv6";
3667 break;
3668 default:
3669 BUG();
3672 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3673 p_name, dev_name_source);
3674 t->sysctl_header =
3675 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3676 if (!t->sysctl_header)
3677 goto free;
3679 p->sysctl_table = t;
3680 return 0;
3682 free:
3683 kfree(t);
3684 err:
3685 return -ENOBUFS;
3687 EXPORT_SYMBOL(neigh_sysctl_register);
3689 void neigh_sysctl_unregister(struct neigh_parms *p)
3691 if (p->sysctl_table) {
3692 struct neigh_sysctl_table *t = p->sysctl_table;
3693 p->sysctl_table = NULL;
3694 unregister_net_sysctl_table(t->sysctl_header);
3695 kfree(t);
3698 EXPORT_SYMBOL(neigh_sysctl_unregister);
3700 #endif /* CONFIG_SYSCTL */
3702 static int __init neigh_init(void)
3704 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3705 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3706 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3708 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3710 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3712 return 0;
3715 subsys_initcall(neigh_init);