drm/ast: Fix ast_dp connection status
[drm/drm-misc.git] / net / core / neighbour.c
blob89656d180bc60c57516d56be69774ed0c7b352b2
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Generic address resolution entity
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
9 * Fixes:
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/sock.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/log2.h>
38 #include <linux/inetdevice.h>
39 #include <net/addrconf.h>
41 #include <trace/events/neigh.h>
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...) \
45 do { \
46 if (level <= NEIGH_DEBUG) \
47 pr_debug(fmt, ##__VA_ARGS__); \
48 } while (0)
50 #define PNEIGH_HASHMASK 0xF
52 static void neigh_timer_handler(struct timer_list *t);
53 static void __neigh_notify(struct neighbour *n, int type, int flags,
54 u32 pid);
55 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
56 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
57 struct net_device *dev);
59 #ifdef CONFIG_PROC_FS
60 static const struct seq_operations neigh_stat_seq_ops;
61 #endif
63 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
65 int i;
67 switch (family) {
68 default:
69 DEBUG_NET_WARN_ON_ONCE(1);
70 fallthrough; /* to avoid panic by null-ptr-deref */
71 case AF_INET:
72 i = NEIGH_ARP_TABLE;
73 break;
74 case AF_INET6:
75 i = NEIGH_ND_TABLE;
76 break;
79 return &dev->neighbours[i];
83 Neighbour hash table buckets are protected with rwlock tbl->lock.
85 - All the scans/updates to hash buckets MUST be made under this lock.
86 - NOTHING clever should be made under this lock: no callbacks
87 to protocol backends, no attempts to send something to network.
88 It will result in deadlocks, if backend/driver wants to use neighbour
89 cache.
90 - If the entry requires some non-trivial actions, increase
91 its reference count and release table lock.
93 Neighbour entries are protected:
94 - with reference count.
95 - with rwlock neigh->lock
97 Reference count prevents destruction.
99 neigh->lock mainly serializes ll address data and its validity state.
100 However, the same lock is used to protect another entry fields:
101 - timer
102 - resolution queue
104 Again, nothing clever shall be made under neigh->lock,
105 the most complicated procedure, which we allow is dev->hard_header.
106 It is supposed, that dev->hard_header is simplistic and does
107 not make callbacks to neighbour tables.
110 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
112 kfree_skb(skb);
113 return -ENETDOWN;
116 static void neigh_cleanup_and_release(struct neighbour *neigh)
118 trace_neigh_cleanup_and_release(neigh, 0);
119 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
120 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
121 neigh_release(neigh);
125 * It is random distribution in the interval (1/2)*base...(3/2)*base.
126 * It corresponds to default IPv6 settings and is not overridable,
127 * because it is really reasonable choice.
130 unsigned long neigh_rand_reach_time(unsigned long base)
132 return base ? get_random_u32_below(base) + (base >> 1) : 0;
134 EXPORT_SYMBOL(neigh_rand_reach_time);
136 static void neigh_mark_dead(struct neighbour *n)
138 n->dead = 1;
139 if (!list_empty(&n->gc_list)) {
140 list_del_init(&n->gc_list);
141 atomic_dec(&n->tbl->gc_entries);
143 if (!list_empty(&n->managed_list))
144 list_del_init(&n->managed_list);
147 static void neigh_update_gc_list(struct neighbour *n)
149 bool on_gc_list, exempt_from_gc;
151 write_lock_bh(&n->tbl->lock);
152 write_lock(&n->lock);
153 if (n->dead)
154 goto out;
156 /* remove from the gc list if new state is permanent or if neighbor
157 * is externally learned; otherwise entry should be on the gc list
159 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
160 n->flags & NTF_EXT_LEARNED;
161 on_gc_list = !list_empty(&n->gc_list);
163 if (exempt_from_gc && on_gc_list) {
164 list_del_init(&n->gc_list);
165 atomic_dec(&n->tbl->gc_entries);
166 } else if (!exempt_from_gc && !on_gc_list) {
167 /* add entries to the tail; cleaning removes from the front */
168 list_add_tail(&n->gc_list, &n->tbl->gc_list);
169 atomic_inc(&n->tbl->gc_entries);
171 out:
172 write_unlock(&n->lock);
173 write_unlock_bh(&n->tbl->lock);
176 static void neigh_update_managed_list(struct neighbour *n)
178 bool on_managed_list, add_to_managed;
180 write_lock_bh(&n->tbl->lock);
181 write_lock(&n->lock);
182 if (n->dead)
183 goto out;
185 add_to_managed = n->flags & NTF_MANAGED;
186 on_managed_list = !list_empty(&n->managed_list);
188 if (!add_to_managed && on_managed_list)
189 list_del_init(&n->managed_list);
190 else if (add_to_managed && !on_managed_list)
191 list_add_tail(&n->managed_list, &n->tbl->managed_list);
192 out:
193 write_unlock(&n->lock);
194 write_unlock_bh(&n->tbl->lock);
197 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
198 bool *gc_update, bool *managed_update)
200 u32 ndm_flags, old_flags = neigh->flags;
202 if (!(flags & NEIGH_UPDATE_F_ADMIN))
203 return;
205 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
206 ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
208 if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
209 if (ndm_flags & NTF_EXT_LEARNED)
210 neigh->flags |= NTF_EXT_LEARNED;
211 else
212 neigh->flags &= ~NTF_EXT_LEARNED;
213 *notify = 1;
214 *gc_update = true;
216 if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
217 if (ndm_flags & NTF_MANAGED)
218 neigh->flags |= NTF_MANAGED;
219 else
220 neigh->flags &= ~NTF_MANAGED;
221 *notify = 1;
222 *managed_update = true;
226 bool neigh_remove_one(struct neighbour *n)
228 bool retval = false;
230 write_lock(&n->lock);
231 if (refcount_read(&n->refcnt) == 1) {
232 hlist_del_rcu(&n->hash);
233 hlist_del_rcu(&n->dev_list);
234 neigh_mark_dead(n);
235 retval = true;
237 write_unlock(&n->lock);
238 if (retval)
239 neigh_cleanup_and_release(n);
240 return retval;
243 static int neigh_forced_gc(struct neigh_table *tbl)
245 int max_clean = atomic_read(&tbl->gc_entries) -
246 READ_ONCE(tbl->gc_thresh2);
247 u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
248 unsigned long tref = jiffies - 5 * HZ;
249 struct neighbour *n, *tmp;
250 int shrunk = 0;
251 int loop = 0;
253 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
255 write_lock_bh(&tbl->lock);
257 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
258 if (refcount_read(&n->refcnt) == 1) {
259 bool remove = false;
261 write_lock(&n->lock);
262 if ((n->nud_state == NUD_FAILED) ||
263 (n->nud_state == NUD_NOARP) ||
264 (tbl->is_multicast &&
265 tbl->is_multicast(n->primary_key)) ||
266 !time_in_range(n->updated, tref, jiffies))
267 remove = true;
268 write_unlock(&n->lock);
270 if (remove && neigh_remove_one(n))
271 shrunk++;
272 if (shrunk >= max_clean)
273 break;
274 if (++loop == 16) {
275 if (ktime_get_ns() > tmax)
276 goto unlock;
277 loop = 0;
282 WRITE_ONCE(tbl->last_flush, jiffies);
283 unlock:
284 write_unlock_bh(&tbl->lock);
286 return shrunk;
289 static void neigh_add_timer(struct neighbour *n, unsigned long when)
291 /* Use safe distance from the jiffies - LONG_MAX point while timer
292 * is running in DELAY/PROBE state but still show to user space
293 * large times in the past.
295 unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
297 neigh_hold(n);
298 if (!time_in_range(n->confirmed, mint, jiffies))
299 n->confirmed = mint;
300 if (time_before(n->used, n->confirmed))
301 n->used = n->confirmed;
302 if (unlikely(mod_timer(&n->timer, when))) {
303 printk("NEIGH: BUG, double timer add, state is %x\n",
304 n->nud_state);
305 dump_stack();
309 static int neigh_del_timer(struct neighbour *n)
311 if ((n->nud_state & NUD_IN_TIMER) &&
312 del_timer(&n->timer)) {
313 neigh_release(n);
314 return 1;
316 return 0;
319 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
320 int family)
322 switch (family) {
323 case AF_INET:
324 return __in_dev_arp_parms_get_rcu(dev);
325 case AF_INET6:
326 return __in6_dev_nd_parms_get_rcu(dev);
328 return NULL;
331 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
333 struct neigh_parms *p;
335 rcu_read_lock();
336 p = neigh_get_dev_parms_rcu(dev, family);
337 if (p)
338 p->qlen--;
339 rcu_read_unlock();
342 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
343 int family)
345 struct sk_buff_head tmp;
346 unsigned long flags;
347 struct sk_buff *skb;
349 skb_queue_head_init(&tmp);
350 spin_lock_irqsave(&list->lock, flags);
351 skb = skb_peek(list);
352 while (skb != NULL) {
353 struct sk_buff *skb_next = skb_peek_next(skb, list);
354 struct net_device *dev = skb->dev;
356 if (net == NULL || net_eq(dev_net(dev), net)) {
357 neigh_parms_qlen_dec(dev, family);
358 __skb_unlink(skb, list);
359 __skb_queue_tail(&tmp, skb);
361 skb = skb_next;
363 spin_unlock_irqrestore(&list->lock, flags);
365 while ((skb = __skb_dequeue(&tmp))) {
366 dev_put(skb->dev);
367 kfree_skb(skb);
371 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
372 bool skip_perm)
374 struct hlist_head *dev_head;
375 struct hlist_node *tmp;
376 struct neighbour *n;
378 dev_head = neigh_get_dev_table(dev, tbl->family);
380 hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
381 if (skip_perm && n->nud_state & NUD_PERMANENT)
382 continue;
384 hlist_del_rcu(&n->hash);
385 hlist_del_rcu(&n->dev_list);
386 write_lock(&n->lock);
387 neigh_del_timer(n);
388 neigh_mark_dead(n);
389 if (refcount_read(&n->refcnt) != 1) {
390 /* The most unpleasant situation.
391 * We must destroy neighbour entry,
392 * but someone still uses it.
394 * The destroy will be delayed until
395 * the last user releases us, but
396 * we must kill timers etc. and move
397 * it to safe state.
399 __skb_queue_purge(&n->arp_queue);
400 n->arp_queue_len_bytes = 0;
401 WRITE_ONCE(n->output, neigh_blackhole);
402 if (n->nud_state & NUD_VALID)
403 n->nud_state = NUD_NOARP;
404 else
405 n->nud_state = NUD_NONE;
406 neigh_dbg(2, "neigh %p is stray\n", n);
408 write_unlock(&n->lock);
409 neigh_cleanup_and_release(n);
413 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
415 write_lock_bh(&tbl->lock);
416 neigh_flush_dev(tbl, dev, false);
417 write_unlock_bh(&tbl->lock);
419 EXPORT_SYMBOL(neigh_changeaddr);
421 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
422 bool skip_perm)
424 write_lock_bh(&tbl->lock);
425 neigh_flush_dev(tbl, dev, skip_perm);
426 pneigh_ifdown_and_unlock(tbl, dev);
427 pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
428 tbl->family);
429 if (skb_queue_empty_lockless(&tbl->proxy_queue))
430 del_timer_sync(&tbl->proxy_timer);
431 return 0;
434 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
436 __neigh_ifdown(tbl, dev, true);
437 return 0;
439 EXPORT_SYMBOL(neigh_carrier_down);
441 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
443 __neigh_ifdown(tbl, dev, false);
444 return 0;
446 EXPORT_SYMBOL(neigh_ifdown);
448 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
449 struct net_device *dev,
450 u32 flags, bool exempt_from_gc)
452 struct neighbour *n = NULL;
453 unsigned long now = jiffies;
454 int entries, gc_thresh3;
456 if (exempt_from_gc)
457 goto do_alloc;
459 entries = atomic_inc_return(&tbl->gc_entries) - 1;
460 gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
461 if (entries >= gc_thresh3 ||
462 (entries >= READ_ONCE(tbl->gc_thresh2) &&
463 time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
464 if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
465 net_info_ratelimited("%s: neighbor table overflow!\n",
466 tbl->id);
467 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
468 goto out_entries;
472 do_alloc:
473 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
474 if (!n)
475 goto out_entries;
477 __skb_queue_head_init(&n->arp_queue);
478 rwlock_init(&n->lock);
479 seqlock_init(&n->ha_lock);
480 n->updated = n->used = now;
481 n->nud_state = NUD_NONE;
482 n->output = neigh_blackhole;
483 n->flags = flags;
484 seqlock_init(&n->hh.hh_lock);
485 n->parms = neigh_parms_clone(&tbl->parms);
486 timer_setup(&n->timer, neigh_timer_handler, 0);
488 NEIGH_CACHE_STAT_INC(tbl, allocs);
489 n->tbl = tbl;
490 refcount_set(&n->refcnt, 1);
491 n->dead = 1;
492 INIT_LIST_HEAD(&n->gc_list);
493 INIT_LIST_HEAD(&n->managed_list);
495 atomic_inc(&tbl->entries);
496 out:
497 return n;
499 out_entries:
500 if (!exempt_from_gc)
501 atomic_dec(&tbl->gc_entries);
502 goto out;
505 static void neigh_get_hash_rnd(u32 *x)
507 *x = get_random_u32() | 1;
510 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
512 size_t size = (1 << shift) * sizeof(struct hlist_head);
513 struct hlist_head *hash_heads;
514 struct neigh_hash_table *ret;
515 int i;
517 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
518 if (!ret)
519 return NULL;
521 hash_heads = kvzalloc(size, GFP_ATOMIC);
522 if (!hash_heads) {
523 kfree(ret);
524 return NULL;
526 ret->hash_heads = hash_heads;
527 ret->hash_shift = shift;
528 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
529 neigh_get_hash_rnd(&ret->hash_rnd[i]);
530 return ret;
533 static void neigh_hash_free_rcu(struct rcu_head *head)
535 struct neigh_hash_table *nht = container_of(head,
536 struct neigh_hash_table,
537 rcu);
539 kvfree(nht->hash_heads);
540 kfree(nht);
543 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
544 unsigned long new_shift)
546 unsigned int i, hash;
547 struct neigh_hash_table *new_nht, *old_nht;
549 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
551 old_nht = rcu_dereference_protected(tbl->nht,
552 lockdep_is_held(&tbl->lock));
553 new_nht = neigh_hash_alloc(new_shift);
554 if (!new_nht)
555 return old_nht;
557 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
558 struct hlist_node *tmp;
559 struct neighbour *n;
561 neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
562 hash = tbl->hash(n->primary_key, n->dev,
563 new_nht->hash_rnd);
565 hash >>= (32 - new_nht->hash_shift);
567 hlist_del_rcu(&n->hash);
568 hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
572 rcu_assign_pointer(tbl->nht, new_nht);
573 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
574 return new_nht;
577 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
578 struct net_device *dev)
580 struct neighbour *n;
582 NEIGH_CACHE_STAT_INC(tbl, lookups);
584 rcu_read_lock();
585 n = __neigh_lookup_noref(tbl, pkey, dev);
586 if (n) {
587 if (!refcount_inc_not_zero(&n->refcnt))
588 n = NULL;
589 NEIGH_CACHE_STAT_INC(tbl, hits);
592 rcu_read_unlock();
593 return n;
595 EXPORT_SYMBOL(neigh_lookup);
597 static struct neighbour *
598 ___neigh_create(struct neigh_table *tbl, const void *pkey,
599 struct net_device *dev, u32 flags,
600 bool exempt_from_gc, bool want_ref)
602 u32 hash_val, key_len = tbl->key_len;
603 struct neighbour *n1, *rc, *n;
604 struct neigh_hash_table *nht;
605 int error;
607 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
608 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
609 if (!n) {
610 rc = ERR_PTR(-ENOBUFS);
611 goto out;
614 memcpy(n->primary_key, pkey, key_len);
615 n->dev = dev;
616 netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
618 /* Protocol specific setup. */
619 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
620 rc = ERR_PTR(error);
621 goto out_neigh_release;
624 if (dev->netdev_ops->ndo_neigh_construct) {
625 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
626 if (error < 0) {
627 rc = ERR_PTR(error);
628 goto out_neigh_release;
632 /* Device specific setup. */
633 if (n->parms->neigh_setup &&
634 (error = n->parms->neigh_setup(n)) < 0) {
635 rc = ERR_PTR(error);
636 goto out_neigh_release;
639 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
641 write_lock_bh(&tbl->lock);
642 nht = rcu_dereference_protected(tbl->nht,
643 lockdep_is_held(&tbl->lock));
645 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
646 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
648 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
650 if (n->parms->dead) {
651 rc = ERR_PTR(-EINVAL);
652 goto out_tbl_unlock;
655 neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
656 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
657 if (want_ref)
658 neigh_hold(n1);
659 rc = n1;
660 goto out_tbl_unlock;
664 n->dead = 0;
665 if (!exempt_from_gc)
666 list_add_tail(&n->gc_list, &n->tbl->gc_list);
667 if (n->flags & NTF_MANAGED)
668 list_add_tail(&n->managed_list, &n->tbl->managed_list);
669 if (want_ref)
670 neigh_hold(n);
671 hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
673 hlist_add_head_rcu(&n->dev_list,
674 neigh_get_dev_table(dev, tbl->family));
676 write_unlock_bh(&tbl->lock);
677 neigh_dbg(2, "neigh %p is created\n", n);
678 rc = n;
679 out:
680 return rc;
681 out_tbl_unlock:
682 write_unlock_bh(&tbl->lock);
683 out_neigh_release:
684 if (!exempt_from_gc)
685 atomic_dec(&tbl->gc_entries);
686 neigh_release(n);
687 goto out;
690 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
691 struct net_device *dev, bool want_ref)
693 bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
695 return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
697 EXPORT_SYMBOL(__neigh_create);
699 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
701 u32 hash_val = *(u32 *)(pkey + key_len - 4);
702 hash_val ^= (hash_val >> 16);
703 hash_val ^= hash_val >> 8;
704 hash_val ^= hash_val >> 4;
705 hash_val &= PNEIGH_HASHMASK;
706 return hash_val;
709 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
710 struct net *net,
711 const void *pkey,
712 unsigned int key_len,
713 struct net_device *dev)
715 while (n) {
716 if (!memcmp(n->key, pkey, key_len) &&
717 net_eq(pneigh_net(n), net) &&
718 (n->dev == dev || !n->dev))
719 return n;
720 n = n->next;
722 return NULL;
725 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
726 struct net *net, const void *pkey, struct net_device *dev)
728 unsigned int key_len = tbl->key_len;
729 u32 hash_val = pneigh_hash(pkey, key_len);
731 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
732 net, pkey, key_len, dev);
734 EXPORT_SYMBOL_GPL(__pneigh_lookup);
736 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
737 struct net *net, const void *pkey,
738 struct net_device *dev, int creat)
740 struct pneigh_entry *n;
741 unsigned int key_len = tbl->key_len;
742 u32 hash_val = pneigh_hash(pkey, key_len);
744 read_lock_bh(&tbl->lock);
745 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
746 net, pkey, key_len, dev);
747 read_unlock_bh(&tbl->lock);
749 if (n || !creat)
750 goto out;
752 ASSERT_RTNL();
754 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
755 if (!n)
756 goto out;
758 write_pnet(&n->net, net);
759 memcpy(n->key, pkey, key_len);
760 n->dev = dev;
761 netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
763 if (tbl->pconstructor && tbl->pconstructor(n)) {
764 netdev_put(dev, &n->dev_tracker);
765 kfree(n);
766 n = NULL;
767 goto out;
770 write_lock_bh(&tbl->lock);
771 n->next = tbl->phash_buckets[hash_val];
772 tbl->phash_buckets[hash_val] = n;
773 write_unlock_bh(&tbl->lock);
774 out:
775 return n;
777 EXPORT_SYMBOL(pneigh_lookup);
780 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
781 struct net_device *dev)
783 struct pneigh_entry *n, **np;
784 unsigned int key_len = tbl->key_len;
785 u32 hash_val = pneigh_hash(pkey, key_len);
787 write_lock_bh(&tbl->lock);
788 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
789 np = &n->next) {
790 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
791 net_eq(pneigh_net(n), net)) {
792 *np = n->next;
793 write_unlock_bh(&tbl->lock);
794 if (tbl->pdestructor)
795 tbl->pdestructor(n);
796 netdev_put(n->dev, &n->dev_tracker);
797 kfree(n);
798 return 0;
801 write_unlock_bh(&tbl->lock);
802 return -ENOENT;
805 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
806 struct net_device *dev)
808 struct pneigh_entry *n, **np, *freelist = NULL;
809 u32 h;
811 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
812 np = &tbl->phash_buckets[h];
813 while ((n = *np) != NULL) {
814 if (!dev || n->dev == dev) {
815 *np = n->next;
816 n->next = freelist;
817 freelist = n;
818 continue;
820 np = &n->next;
823 write_unlock_bh(&tbl->lock);
824 while ((n = freelist)) {
825 freelist = n->next;
826 n->next = NULL;
827 if (tbl->pdestructor)
828 tbl->pdestructor(n);
829 netdev_put(n->dev, &n->dev_tracker);
830 kfree(n);
832 return -ENOENT;
835 static void neigh_parms_destroy(struct neigh_parms *parms);
837 static inline void neigh_parms_put(struct neigh_parms *parms)
839 if (refcount_dec_and_test(&parms->refcnt))
840 neigh_parms_destroy(parms);
844 * neighbour must already be out of the table;
847 void neigh_destroy(struct neighbour *neigh)
849 struct net_device *dev = neigh->dev;
851 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
853 if (!neigh->dead) {
854 pr_warn("Destroying alive neighbour %p\n", neigh);
855 dump_stack();
856 return;
859 if (neigh_del_timer(neigh))
860 pr_warn("Impossible event\n");
862 write_lock_bh(&neigh->lock);
863 __skb_queue_purge(&neigh->arp_queue);
864 write_unlock_bh(&neigh->lock);
865 neigh->arp_queue_len_bytes = 0;
867 if (dev->netdev_ops->ndo_neigh_destroy)
868 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
870 netdev_put(dev, &neigh->dev_tracker);
871 neigh_parms_put(neigh->parms);
873 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
875 atomic_dec(&neigh->tbl->entries);
876 kfree_rcu(neigh, rcu);
878 EXPORT_SYMBOL(neigh_destroy);
880 /* Neighbour state is suspicious;
881 disable fast path.
883 Called with write_locked neigh.
885 static void neigh_suspect(struct neighbour *neigh)
887 neigh_dbg(2, "neigh %p is suspected\n", neigh);
889 WRITE_ONCE(neigh->output, neigh->ops->output);
892 /* Neighbour state is OK;
893 enable fast path.
895 Called with write_locked neigh.
897 static void neigh_connect(struct neighbour *neigh)
899 neigh_dbg(2, "neigh %p is connected\n", neigh);
901 WRITE_ONCE(neigh->output, neigh->ops->connected_output);
904 static void neigh_periodic_work(struct work_struct *work)
906 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
907 struct neigh_hash_table *nht;
908 struct hlist_node *tmp;
909 struct neighbour *n;
910 unsigned int i;
912 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
914 write_lock_bh(&tbl->lock);
915 nht = rcu_dereference_protected(tbl->nht,
916 lockdep_is_held(&tbl->lock));
919 * periodically recompute ReachableTime from random function
922 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
923 struct neigh_parms *p;
925 WRITE_ONCE(tbl->last_rand, jiffies);
926 list_for_each_entry(p, &tbl->parms_list, list)
927 p->reachable_time =
928 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
931 if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
932 goto out;
934 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
935 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
936 unsigned int state;
938 write_lock(&n->lock);
940 state = n->nud_state;
941 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
942 (n->flags & NTF_EXT_LEARNED)) {
943 write_unlock(&n->lock);
944 continue;
947 if (time_before(n->used, n->confirmed) &&
948 time_is_before_eq_jiffies(n->confirmed))
949 n->used = n->confirmed;
951 if (refcount_read(&n->refcnt) == 1 &&
952 (state == NUD_FAILED ||
953 !time_in_range_open(jiffies, n->used,
954 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
955 hlist_del_rcu(&n->hash);
956 hlist_del_rcu(&n->dev_list);
957 neigh_mark_dead(n);
958 write_unlock(&n->lock);
959 neigh_cleanup_and_release(n);
960 continue;
962 write_unlock(&n->lock);
965 * It's fine to release lock here, even if hash table
966 * grows while we are preempted.
968 write_unlock_bh(&tbl->lock);
969 cond_resched();
970 write_lock_bh(&tbl->lock);
971 nht = rcu_dereference_protected(tbl->nht,
972 lockdep_is_held(&tbl->lock));
974 out:
975 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
976 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
977 * BASE_REACHABLE_TIME.
979 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
980 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
981 write_unlock_bh(&tbl->lock);
984 static __inline__ int neigh_max_probes(struct neighbour *n)
986 struct neigh_parms *p = n->parms;
987 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
988 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
989 NEIGH_VAR(p, MCAST_PROBES));
992 static void neigh_invalidate(struct neighbour *neigh)
993 __releases(neigh->lock)
994 __acquires(neigh->lock)
996 struct sk_buff *skb;
998 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
999 neigh_dbg(2, "neigh %p is failed\n", neigh);
1000 neigh->updated = jiffies;
1002 /* It is very thin place. report_unreachable is very complicated
1003 routine. Particularly, it can hit the same neighbour entry!
1005 So that, we try to be accurate and avoid dead loop. --ANK
1007 while (neigh->nud_state == NUD_FAILED &&
1008 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1009 write_unlock(&neigh->lock);
1010 neigh->ops->error_report(neigh, skb);
1011 write_lock(&neigh->lock);
1013 __skb_queue_purge(&neigh->arp_queue);
1014 neigh->arp_queue_len_bytes = 0;
1017 static void neigh_probe(struct neighbour *neigh)
1018 __releases(neigh->lock)
1020 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1021 /* keep skb alive even if arp_queue overflows */
1022 if (skb)
1023 skb = skb_clone(skb, GFP_ATOMIC);
1024 write_unlock(&neigh->lock);
1025 if (neigh->ops->solicit)
1026 neigh->ops->solicit(neigh, skb);
1027 atomic_inc(&neigh->probes);
1028 consume_skb(skb);
1031 /* Called when a timer expires for a neighbour entry. */
1033 static void neigh_timer_handler(struct timer_list *t)
1035 unsigned long now, next;
1036 struct neighbour *neigh = from_timer(neigh, t, timer);
1037 unsigned int state;
1038 int notify = 0;
1040 write_lock(&neigh->lock);
1042 state = neigh->nud_state;
1043 now = jiffies;
1044 next = now + HZ;
1046 if (!(state & NUD_IN_TIMER))
1047 goto out;
1049 if (state & NUD_REACHABLE) {
1050 if (time_before_eq(now,
1051 neigh->confirmed + neigh->parms->reachable_time)) {
1052 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1053 next = neigh->confirmed + neigh->parms->reachable_time;
1054 } else if (time_before_eq(now,
1055 neigh->used +
1056 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1057 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1058 WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1059 neigh->updated = jiffies;
1060 neigh_suspect(neigh);
1061 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1062 } else {
1063 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1064 WRITE_ONCE(neigh->nud_state, NUD_STALE);
1065 neigh->updated = jiffies;
1066 neigh_suspect(neigh);
1067 notify = 1;
1069 } else if (state & NUD_DELAY) {
1070 if (time_before_eq(now,
1071 neigh->confirmed +
1072 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1073 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1074 WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1075 neigh->updated = jiffies;
1076 neigh_connect(neigh);
1077 notify = 1;
1078 next = neigh->confirmed + neigh->parms->reachable_time;
1079 } else {
1080 neigh_dbg(2, "neigh %p is probed\n", neigh);
1081 WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1082 neigh->updated = jiffies;
1083 atomic_set(&neigh->probes, 0);
1084 notify = 1;
1085 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1086 HZ/100);
1088 } else {
1089 /* NUD_PROBE|NUD_INCOMPLETE */
1090 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1093 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1094 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1095 WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1096 notify = 1;
1097 neigh_invalidate(neigh);
1098 goto out;
1101 if (neigh->nud_state & NUD_IN_TIMER) {
1102 if (time_before(next, jiffies + HZ/100))
1103 next = jiffies + HZ/100;
1104 if (!mod_timer(&neigh->timer, next))
1105 neigh_hold(neigh);
1107 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1108 neigh_probe(neigh);
1109 } else {
1110 out:
1111 write_unlock(&neigh->lock);
1114 if (notify)
1115 neigh_update_notify(neigh, 0);
1117 trace_neigh_timer_handler(neigh, 0);
1119 neigh_release(neigh);
1122 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1123 const bool immediate_ok)
1125 int rc;
1126 bool immediate_probe = false;
1128 write_lock_bh(&neigh->lock);
1130 rc = 0;
1131 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1132 goto out_unlock_bh;
1133 if (neigh->dead)
1134 goto out_dead;
1136 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1137 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1138 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1139 unsigned long next, now = jiffies;
1141 atomic_set(&neigh->probes,
1142 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1143 neigh_del_timer(neigh);
1144 WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1145 neigh->updated = now;
1146 if (!immediate_ok) {
1147 next = now + 1;
1148 } else {
1149 immediate_probe = true;
1150 next = now + max(NEIGH_VAR(neigh->parms,
1151 RETRANS_TIME),
1152 HZ / 100);
1154 neigh_add_timer(neigh, next);
1155 } else {
1156 WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1157 neigh->updated = jiffies;
1158 write_unlock_bh(&neigh->lock);
1160 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1161 return 1;
1163 } else if (neigh->nud_state & NUD_STALE) {
1164 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1165 neigh_del_timer(neigh);
1166 WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1167 neigh->updated = jiffies;
1168 neigh_add_timer(neigh, jiffies +
1169 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1172 if (neigh->nud_state == NUD_INCOMPLETE) {
1173 if (skb) {
1174 while (neigh->arp_queue_len_bytes + skb->truesize >
1175 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1176 struct sk_buff *buff;
1178 buff = __skb_dequeue(&neigh->arp_queue);
1179 if (!buff)
1180 break;
1181 neigh->arp_queue_len_bytes -= buff->truesize;
1182 kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1183 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1185 skb_dst_force(skb);
1186 __skb_queue_tail(&neigh->arp_queue, skb);
1187 neigh->arp_queue_len_bytes += skb->truesize;
1189 rc = 1;
1191 out_unlock_bh:
1192 if (immediate_probe)
1193 neigh_probe(neigh);
1194 else
1195 write_unlock(&neigh->lock);
1196 local_bh_enable();
1197 trace_neigh_event_send_done(neigh, rc);
1198 return rc;
1200 out_dead:
1201 if (neigh->nud_state & NUD_STALE)
1202 goto out_unlock_bh;
1203 write_unlock_bh(&neigh->lock);
1204 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1205 trace_neigh_event_send_dead(neigh, 1);
1206 return 1;
1208 EXPORT_SYMBOL(__neigh_event_send);
1210 static void neigh_update_hhs(struct neighbour *neigh)
1212 struct hh_cache *hh;
1213 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1214 = NULL;
1216 if (neigh->dev->header_ops)
1217 update = neigh->dev->header_ops->cache_update;
1219 if (update) {
1220 hh = &neigh->hh;
1221 if (READ_ONCE(hh->hh_len)) {
1222 write_seqlock_bh(&hh->hh_lock);
1223 update(hh, neigh->dev, neigh->ha);
1224 write_sequnlock_bh(&hh->hh_lock);
1229 /* Generic update routine.
1230 -- lladdr is new lladdr or NULL, if it is not supplied.
1231 -- new is new state.
1232 -- flags
1233 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1234 if it is different.
1235 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1236 lladdr instead of overriding it
1237 if it is different.
1238 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1239 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1240 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1241 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1242 NTF_ROUTER flag.
1243 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1244 a router.
1246 Caller MUST hold reference count on the entry.
1248 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1249 u8 new, u32 flags, u32 nlmsg_pid,
1250 struct netlink_ext_ack *extack)
1252 bool gc_update = false, managed_update = false;
1253 int update_isrouter = 0;
1254 struct net_device *dev;
1255 int err, notify = 0;
1256 u8 old;
1258 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1260 write_lock_bh(&neigh->lock);
1262 dev = neigh->dev;
1263 old = neigh->nud_state;
1264 err = -EPERM;
1266 if (neigh->dead) {
1267 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1268 new = old;
1269 goto out;
1271 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1272 (old & (NUD_NOARP | NUD_PERMANENT)))
1273 goto out;
1275 neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1276 if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1277 new = old & ~NUD_PERMANENT;
1278 WRITE_ONCE(neigh->nud_state, new);
1279 err = 0;
1280 goto out;
1283 if (!(new & NUD_VALID)) {
1284 neigh_del_timer(neigh);
1285 if (old & NUD_CONNECTED)
1286 neigh_suspect(neigh);
1287 WRITE_ONCE(neigh->nud_state, new);
1288 err = 0;
1289 notify = old & NUD_VALID;
1290 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1291 (new & NUD_FAILED)) {
1292 neigh_invalidate(neigh);
1293 notify = 1;
1295 goto out;
1298 /* Compare new lladdr with cached one */
1299 if (!dev->addr_len) {
1300 /* First case: device needs no address. */
1301 lladdr = neigh->ha;
1302 } else if (lladdr) {
1303 /* The second case: if something is already cached
1304 and a new address is proposed:
1305 - compare new & old
1306 - if they are different, check override flag
1308 if ((old & NUD_VALID) &&
1309 !memcmp(lladdr, neigh->ha, dev->addr_len))
1310 lladdr = neigh->ha;
1311 } else {
1312 /* No address is supplied; if we know something,
1313 use it, otherwise discard the request.
1315 err = -EINVAL;
1316 if (!(old & NUD_VALID)) {
1317 NL_SET_ERR_MSG(extack, "No link layer address given");
1318 goto out;
1320 lladdr = neigh->ha;
1323 /* Update confirmed timestamp for neighbour entry after we
1324 * received ARP packet even if it doesn't change IP to MAC binding.
1326 if (new & NUD_CONNECTED)
1327 neigh->confirmed = jiffies;
1329 /* If entry was valid and address is not changed,
1330 do not change entry state, if new one is STALE.
1332 err = 0;
1333 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1334 if (old & NUD_VALID) {
1335 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1336 update_isrouter = 0;
1337 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1338 (old & NUD_CONNECTED)) {
1339 lladdr = neigh->ha;
1340 new = NUD_STALE;
1341 } else
1342 goto out;
1343 } else {
1344 if (lladdr == neigh->ha && new == NUD_STALE &&
1345 !(flags & NEIGH_UPDATE_F_ADMIN))
1346 new = old;
1350 /* Update timestamp only once we know we will make a change to the
1351 * neighbour entry. Otherwise we risk to move the locktime window with
1352 * noop updates and ignore relevant ARP updates.
1354 if (new != old || lladdr != neigh->ha)
1355 neigh->updated = jiffies;
1357 if (new != old) {
1358 neigh_del_timer(neigh);
1359 if (new & NUD_PROBE)
1360 atomic_set(&neigh->probes, 0);
1361 if (new & NUD_IN_TIMER)
1362 neigh_add_timer(neigh, (jiffies +
1363 ((new & NUD_REACHABLE) ?
1364 neigh->parms->reachable_time :
1365 0)));
1366 WRITE_ONCE(neigh->nud_state, new);
1367 notify = 1;
1370 if (lladdr != neigh->ha) {
1371 write_seqlock(&neigh->ha_lock);
1372 memcpy(&neigh->ha, lladdr, dev->addr_len);
1373 write_sequnlock(&neigh->ha_lock);
1374 neigh_update_hhs(neigh);
1375 if (!(new & NUD_CONNECTED))
1376 neigh->confirmed = jiffies -
1377 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1378 notify = 1;
1380 if (new == old)
1381 goto out;
1382 if (new & NUD_CONNECTED)
1383 neigh_connect(neigh);
1384 else
1385 neigh_suspect(neigh);
1386 if (!(old & NUD_VALID)) {
1387 struct sk_buff *skb;
1389 /* Again: avoid dead loop if something went wrong */
1391 while (neigh->nud_state & NUD_VALID &&
1392 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1393 struct dst_entry *dst = skb_dst(skb);
1394 struct neighbour *n2, *n1 = neigh;
1395 write_unlock_bh(&neigh->lock);
1397 rcu_read_lock();
1399 /* Why not just use 'neigh' as-is? The problem is that
1400 * things such as shaper, eql, and sch_teql can end up
1401 * using alternative, different, neigh objects to output
1402 * the packet in the output path. So what we need to do
1403 * here is re-lookup the top-level neigh in the path so
1404 * we can reinject the packet there.
1406 n2 = NULL;
1407 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1408 n2 = dst_neigh_lookup_skb(dst, skb);
1409 if (n2)
1410 n1 = n2;
1412 READ_ONCE(n1->output)(n1, skb);
1413 if (n2)
1414 neigh_release(n2);
1415 rcu_read_unlock();
1417 write_lock_bh(&neigh->lock);
1419 __skb_queue_purge(&neigh->arp_queue);
1420 neigh->arp_queue_len_bytes = 0;
1422 out:
1423 if (update_isrouter)
1424 neigh_update_is_router(neigh, flags, &notify);
1425 write_unlock_bh(&neigh->lock);
1426 if (((new ^ old) & NUD_PERMANENT) || gc_update)
1427 neigh_update_gc_list(neigh);
1428 if (managed_update)
1429 neigh_update_managed_list(neigh);
1430 if (notify)
1431 neigh_update_notify(neigh, nlmsg_pid);
1432 trace_neigh_update_done(neigh, err);
1433 return err;
1436 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1437 u32 flags, u32 nlmsg_pid)
1439 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1441 EXPORT_SYMBOL(neigh_update);
1443 /* Update the neigh to listen temporarily for probe responses, even if it is
1444 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1446 void __neigh_set_probe_once(struct neighbour *neigh)
1448 if (neigh->dead)
1449 return;
1450 neigh->updated = jiffies;
1451 if (!(neigh->nud_state & NUD_FAILED))
1452 return;
1453 WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1454 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1455 neigh_add_timer(neigh,
1456 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1457 HZ/100));
1459 EXPORT_SYMBOL(__neigh_set_probe_once);
1461 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1462 u8 *lladdr, void *saddr,
1463 struct net_device *dev)
1465 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1466 lladdr || !dev->addr_len);
1467 if (neigh)
1468 neigh_update(neigh, lladdr, NUD_STALE,
1469 NEIGH_UPDATE_F_OVERRIDE, 0);
1470 return neigh;
1472 EXPORT_SYMBOL(neigh_event_ns);
1474 /* called with read_lock_bh(&n->lock); */
1475 static void neigh_hh_init(struct neighbour *n)
1477 struct net_device *dev = n->dev;
1478 __be16 prot = n->tbl->protocol;
1479 struct hh_cache *hh = &n->hh;
1481 write_lock_bh(&n->lock);
1483 /* Only one thread can come in here and initialize the
1484 * hh_cache entry.
1486 if (!hh->hh_len)
1487 dev->header_ops->cache(n, hh, prot);
1489 write_unlock_bh(&n->lock);
1492 /* Slow and careful. */
1494 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1496 int rc = 0;
1498 if (!neigh_event_send(neigh, skb)) {
1499 int err;
1500 struct net_device *dev = neigh->dev;
1501 unsigned int seq;
1503 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1504 neigh_hh_init(neigh);
1506 do {
1507 __skb_pull(skb, skb_network_offset(skb));
1508 seq = read_seqbegin(&neigh->ha_lock);
1509 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1510 neigh->ha, NULL, skb->len);
1511 } while (read_seqretry(&neigh->ha_lock, seq));
1513 if (err >= 0)
1514 rc = dev_queue_xmit(skb);
1515 else
1516 goto out_kfree_skb;
1518 out:
1519 return rc;
1520 out_kfree_skb:
1521 rc = -EINVAL;
1522 kfree_skb(skb);
1523 goto out;
1525 EXPORT_SYMBOL(neigh_resolve_output);
1527 /* As fast as possible without hh cache */
1529 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1531 struct net_device *dev = neigh->dev;
1532 unsigned int seq;
1533 int err;
1535 do {
1536 __skb_pull(skb, skb_network_offset(skb));
1537 seq = read_seqbegin(&neigh->ha_lock);
1538 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1539 neigh->ha, NULL, skb->len);
1540 } while (read_seqretry(&neigh->ha_lock, seq));
1542 if (err >= 0)
1543 err = dev_queue_xmit(skb);
1544 else {
1545 err = -EINVAL;
1546 kfree_skb(skb);
1548 return err;
1550 EXPORT_SYMBOL(neigh_connected_output);
1552 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1554 return dev_queue_xmit(skb);
1556 EXPORT_SYMBOL(neigh_direct_output);
1558 static void neigh_managed_work(struct work_struct *work)
1560 struct neigh_table *tbl = container_of(work, struct neigh_table,
1561 managed_work.work);
1562 struct neighbour *neigh;
1564 write_lock_bh(&tbl->lock);
1565 list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1566 neigh_event_send_probe(neigh, NULL, false);
1567 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1568 NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1569 write_unlock_bh(&tbl->lock);
1572 static void neigh_proxy_process(struct timer_list *t)
1574 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1575 long sched_next = 0;
1576 unsigned long now = jiffies;
1577 struct sk_buff *skb, *n;
1579 spin_lock(&tbl->proxy_queue.lock);
1581 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1582 long tdif = NEIGH_CB(skb)->sched_next - now;
1584 if (tdif <= 0) {
1585 struct net_device *dev = skb->dev;
1587 neigh_parms_qlen_dec(dev, tbl->family);
1588 __skb_unlink(skb, &tbl->proxy_queue);
1590 if (tbl->proxy_redo && netif_running(dev)) {
1591 rcu_read_lock();
1592 tbl->proxy_redo(skb);
1593 rcu_read_unlock();
1594 } else {
1595 kfree_skb(skb);
1598 dev_put(dev);
1599 } else if (!sched_next || tdif < sched_next)
1600 sched_next = tdif;
1602 del_timer(&tbl->proxy_timer);
1603 if (sched_next)
1604 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1605 spin_unlock(&tbl->proxy_queue.lock);
1608 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1610 /* If proxy_delay is zero, do not call get_random_u32_below()
1611 * as it is undefined behavior.
1613 unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1615 return proxy_delay ?
1616 jiffies + get_random_u32_below(proxy_delay) : jiffies;
1619 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1620 struct sk_buff *skb)
1622 unsigned long sched_next = neigh_proxy_delay(p);
1624 if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1625 kfree_skb(skb);
1626 return;
1629 NEIGH_CB(skb)->sched_next = sched_next;
1630 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1632 spin_lock(&tbl->proxy_queue.lock);
1633 if (del_timer(&tbl->proxy_timer)) {
1634 if (time_before(tbl->proxy_timer.expires, sched_next))
1635 sched_next = tbl->proxy_timer.expires;
1637 skb_dst_drop(skb);
1638 dev_hold(skb->dev);
1639 __skb_queue_tail(&tbl->proxy_queue, skb);
1640 p->qlen++;
1641 mod_timer(&tbl->proxy_timer, sched_next);
1642 spin_unlock(&tbl->proxy_queue.lock);
1644 EXPORT_SYMBOL(pneigh_enqueue);
1646 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1647 struct net *net, int ifindex)
1649 struct neigh_parms *p;
1651 list_for_each_entry(p, &tbl->parms_list, list) {
1652 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1653 (!p->dev && !ifindex && net_eq(net, &init_net)))
1654 return p;
1657 return NULL;
1660 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1661 struct neigh_table *tbl)
1663 struct neigh_parms *p;
1664 struct net *net = dev_net(dev);
1665 const struct net_device_ops *ops = dev->netdev_ops;
1667 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1668 if (p) {
1669 p->tbl = tbl;
1670 refcount_set(&p->refcnt, 1);
1671 p->reachable_time =
1672 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1673 p->qlen = 0;
1674 netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1675 p->dev = dev;
1676 write_pnet(&p->net, net);
1677 p->sysctl_table = NULL;
1679 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1680 netdev_put(dev, &p->dev_tracker);
1681 kfree(p);
1682 return NULL;
1685 write_lock_bh(&tbl->lock);
1686 list_add(&p->list, &tbl->parms.list);
1687 write_unlock_bh(&tbl->lock);
1689 neigh_parms_data_state_cleanall(p);
1691 return p;
1693 EXPORT_SYMBOL(neigh_parms_alloc);
1695 static void neigh_rcu_free_parms(struct rcu_head *head)
1697 struct neigh_parms *parms =
1698 container_of(head, struct neigh_parms, rcu_head);
1700 neigh_parms_put(parms);
1703 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1705 if (!parms || parms == &tbl->parms)
1706 return;
1707 write_lock_bh(&tbl->lock);
1708 list_del(&parms->list);
1709 parms->dead = 1;
1710 write_unlock_bh(&tbl->lock);
1711 netdev_put(parms->dev, &parms->dev_tracker);
1712 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1714 EXPORT_SYMBOL(neigh_parms_release);
1716 static void neigh_parms_destroy(struct neigh_parms *parms)
1718 kfree(parms);
1721 static struct lock_class_key neigh_table_proxy_queue_class;
1723 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1725 void neigh_table_init(int index, struct neigh_table *tbl)
1727 unsigned long now = jiffies;
1728 unsigned long phsize;
1730 INIT_LIST_HEAD(&tbl->parms_list);
1731 INIT_LIST_HEAD(&tbl->gc_list);
1732 INIT_LIST_HEAD(&tbl->managed_list);
1734 list_add(&tbl->parms.list, &tbl->parms_list);
1735 write_pnet(&tbl->parms.net, &init_net);
1736 refcount_set(&tbl->parms.refcnt, 1);
1737 tbl->parms.reachable_time =
1738 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1739 tbl->parms.qlen = 0;
1741 tbl->stats = alloc_percpu(struct neigh_statistics);
1742 if (!tbl->stats)
1743 panic("cannot create neighbour cache statistics");
1745 #ifdef CONFIG_PROC_FS
1746 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1747 &neigh_stat_seq_ops, tbl))
1748 panic("cannot create neighbour proc dir entry");
1749 #endif
1751 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1753 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1754 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1756 if (!tbl->nht || !tbl->phash_buckets)
1757 panic("cannot allocate neighbour cache hashes");
1759 if (!tbl->entry_size)
1760 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1761 tbl->key_len, NEIGH_PRIV_ALIGN);
1762 else
1763 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1765 rwlock_init(&tbl->lock);
1767 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1768 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1769 tbl->parms.reachable_time);
1770 INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1771 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1773 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1774 skb_queue_head_init_class(&tbl->proxy_queue,
1775 &neigh_table_proxy_queue_class);
1777 tbl->last_flush = now;
1778 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1780 rcu_assign_pointer(neigh_tables[index], tbl);
1782 EXPORT_SYMBOL(neigh_table_init);
1785 * Only called from ndisc_cleanup(), which means this is dead code
1786 * because we no longer can unload IPv6 module.
1788 int neigh_table_clear(int index, struct neigh_table *tbl)
1790 RCU_INIT_POINTER(neigh_tables[index], NULL);
1791 synchronize_rcu();
1793 /* It is not clean... Fix it to unload IPv6 module safely */
1794 cancel_delayed_work_sync(&tbl->managed_work);
1795 cancel_delayed_work_sync(&tbl->gc_work);
1796 del_timer_sync(&tbl->proxy_timer);
1797 pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1798 neigh_ifdown(tbl, NULL);
1799 if (atomic_read(&tbl->entries))
1800 pr_crit("neighbour leakage\n");
1802 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1803 neigh_hash_free_rcu);
1804 tbl->nht = NULL;
1806 kfree(tbl->phash_buckets);
1807 tbl->phash_buckets = NULL;
1809 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1811 free_percpu(tbl->stats);
1812 tbl->stats = NULL;
1814 return 0;
1816 EXPORT_SYMBOL(neigh_table_clear);
1818 static struct neigh_table *neigh_find_table(int family)
1820 struct neigh_table *tbl = NULL;
1822 switch (family) {
1823 case AF_INET:
1824 tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1825 break;
1826 case AF_INET6:
1827 tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1828 break;
1831 return tbl;
1834 const struct nla_policy nda_policy[NDA_MAX+1] = {
1835 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1836 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1837 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1838 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1839 [NDA_PROBES] = { .type = NLA_U32 },
1840 [NDA_VLAN] = { .type = NLA_U16 },
1841 [NDA_PORT] = { .type = NLA_U16 },
1842 [NDA_VNI] = { .type = NLA_U32 },
1843 [NDA_IFINDEX] = { .type = NLA_U32 },
1844 [NDA_MASTER] = { .type = NLA_U32 },
1845 [NDA_PROTOCOL] = { .type = NLA_U8 },
1846 [NDA_NH_ID] = { .type = NLA_U32 },
1847 [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1848 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1851 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1852 struct netlink_ext_ack *extack)
1854 struct net *net = sock_net(skb->sk);
1855 struct ndmsg *ndm;
1856 struct nlattr *dst_attr;
1857 struct neigh_table *tbl;
1858 struct neighbour *neigh;
1859 struct net_device *dev = NULL;
1860 int err = -EINVAL;
1862 ASSERT_RTNL();
1863 if (nlmsg_len(nlh) < sizeof(*ndm))
1864 goto out;
1866 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1867 if (!dst_attr) {
1868 NL_SET_ERR_MSG(extack, "Network address not specified");
1869 goto out;
1872 ndm = nlmsg_data(nlh);
1873 if (ndm->ndm_ifindex) {
1874 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1875 if (dev == NULL) {
1876 err = -ENODEV;
1877 goto out;
1881 tbl = neigh_find_table(ndm->ndm_family);
1882 if (tbl == NULL)
1883 return -EAFNOSUPPORT;
1885 if (nla_len(dst_attr) < (int)tbl->key_len) {
1886 NL_SET_ERR_MSG(extack, "Invalid network address");
1887 goto out;
1890 if (ndm->ndm_flags & NTF_PROXY) {
1891 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1892 goto out;
1895 if (dev == NULL)
1896 goto out;
1898 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1899 if (neigh == NULL) {
1900 err = -ENOENT;
1901 goto out;
1904 err = __neigh_update(neigh, NULL, NUD_FAILED,
1905 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1906 NETLINK_CB(skb).portid, extack);
1907 write_lock_bh(&tbl->lock);
1908 neigh_release(neigh);
1909 neigh_remove_one(neigh);
1910 write_unlock_bh(&tbl->lock);
1912 out:
1913 return err;
1916 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1917 struct netlink_ext_ack *extack)
1919 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1920 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1921 struct net *net = sock_net(skb->sk);
1922 struct ndmsg *ndm;
1923 struct nlattr *tb[NDA_MAX+1];
1924 struct neigh_table *tbl;
1925 struct net_device *dev = NULL;
1926 struct neighbour *neigh;
1927 void *dst, *lladdr;
1928 u8 protocol = 0;
1929 u32 ndm_flags;
1930 int err;
1932 ASSERT_RTNL();
1933 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1934 nda_policy, extack);
1935 if (err < 0)
1936 goto out;
1938 err = -EINVAL;
1939 if (!tb[NDA_DST]) {
1940 NL_SET_ERR_MSG(extack, "Network address not specified");
1941 goto out;
1944 ndm = nlmsg_data(nlh);
1945 ndm_flags = ndm->ndm_flags;
1946 if (tb[NDA_FLAGS_EXT]) {
1947 u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1949 BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1950 (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1951 hweight32(NTF_EXT_MASK)));
1952 ndm_flags |= (ext << NTF_EXT_SHIFT);
1954 if (ndm->ndm_ifindex) {
1955 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1956 if (dev == NULL) {
1957 err = -ENODEV;
1958 goto out;
1961 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1962 NL_SET_ERR_MSG(extack, "Invalid link address");
1963 goto out;
1967 tbl = neigh_find_table(ndm->ndm_family);
1968 if (tbl == NULL)
1969 return -EAFNOSUPPORT;
1971 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1972 NL_SET_ERR_MSG(extack, "Invalid network address");
1973 goto out;
1976 dst = nla_data(tb[NDA_DST]);
1977 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1979 if (tb[NDA_PROTOCOL])
1980 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1981 if (ndm_flags & NTF_PROXY) {
1982 struct pneigh_entry *pn;
1984 if (ndm_flags & NTF_MANAGED) {
1985 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
1986 goto out;
1989 err = -ENOBUFS;
1990 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1991 if (pn) {
1992 pn->flags = ndm_flags;
1993 if (protocol)
1994 pn->protocol = protocol;
1995 err = 0;
1997 goto out;
2000 if (!dev) {
2001 NL_SET_ERR_MSG(extack, "Device not specified");
2002 goto out;
2005 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2006 err = -EINVAL;
2007 goto out;
2010 neigh = neigh_lookup(tbl, dst, dev);
2011 if (neigh == NULL) {
2012 bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
2013 bool exempt_from_gc = ndm_permanent ||
2014 ndm_flags & NTF_EXT_LEARNED;
2016 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2017 err = -ENOENT;
2018 goto out;
2020 if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2021 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2022 err = -EINVAL;
2023 goto out;
2026 neigh = ___neigh_create(tbl, dst, dev,
2027 ndm_flags &
2028 (NTF_EXT_LEARNED | NTF_MANAGED),
2029 exempt_from_gc, true);
2030 if (IS_ERR(neigh)) {
2031 err = PTR_ERR(neigh);
2032 goto out;
2034 } else {
2035 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2036 err = -EEXIST;
2037 neigh_release(neigh);
2038 goto out;
2041 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2042 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2043 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2046 if (protocol)
2047 neigh->protocol = protocol;
2048 if (ndm_flags & NTF_EXT_LEARNED)
2049 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2050 if (ndm_flags & NTF_ROUTER)
2051 flags |= NEIGH_UPDATE_F_ISROUTER;
2052 if (ndm_flags & NTF_MANAGED)
2053 flags |= NEIGH_UPDATE_F_MANAGED;
2054 if (ndm_flags & NTF_USE)
2055 flags |= NEIGH_UPDATE_F_USE;
2057 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2058 NETLINK_CB(skb).portid, extack);
2059 if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2060 neigh_event_send(neigh, NULL);
2061 err = 0;
2063 neigh_release(neigh);
2064 out:
2065 return err;
2068 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2070 struct nlattr *nest;
2072 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2073 if (nest == NULL)
2074 return -ENOBUFS;
2076 if ((parms->dev &&
2077 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2078 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2079 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2080 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2081 /* approximative value for deprecated QUEUE_LEN (in packets) */
2082 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2083 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2084 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2085 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2086 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2087 NEIGH_VAR(parms, UCAST_PROBES)) ||
2088 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2089 NEIGH_VAR(parms, MCAST_PROBES)) ||
2090 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2091 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2092 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2093 NDTPA_PAD) ||
2094 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2095 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2096 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2097 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2098 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2099 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2100 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2101 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2102 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2103 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2104 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2105 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2106 nla_put_msecs(skb, NDTPA_LOCKTIME,
2107 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2108 nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2109 NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2110 goto nla_put_failure;
2111 return nla_nest_end(skb, nest);
2113 nla_put_failure:
2114 nla_nest_cancel(skb, nest);
2115 return -EMSGSIZE;
2118 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2119 u32 pid, u32 seq, int type, int flags)
2121 struct nlmsghdr *nlh;
2122 struct ndtmsg *ndtmsg;
2124 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2125 if (nlh == NULL)
2126 return -EMSGSIZE;
2128 ndtmsg = nlmsg_data(nlh);
2130 read_lock_bh(&tbl->lock);
2131 ndtmsg->ndtm_family = tbl->family;
2132 ndtmsg->ndtm_pad1 = 0;
2133 ndtmsg->ndtm_pad2 = 0;
2135 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2136 nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2137 NDTA_PAD) ||
2138 nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2139 nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2140 nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2141 goto nla_put_failure;
2143 unsigned long now = jiffies;
2144 long flush_delta = now - READ_ONCE(tbl->last_flush);
2145 long rand_delta = now - READ_ONCE(tbl->last_rand);
2146 struct neigh_hash_table *nht;
2147 struct ndt_config ndc = {
2148 .ndtc_key_len = tbl->key_len,
2149 .ndtc_entry_size = tbl->entry_size,
2150 .ndtc_entries = atomic_read(&tbl->entries),
2151 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2152 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2153 .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen),
2156 rcu_read_lock();
2157 nht = rcu_dereference(tbl->nht);
2158 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2159 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2160 rcu_read_unlock();
2162 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2163 goto nla_put_failure;
2167 int cpu;
2168 struct ndt_stats ndst;
2170 memset(&ndst, 0, sizeof(ndst));
2172 for_each_possible_cpu(cpu) {
2173 struct neigh_statistics *st;
2175 st = per_cpu_ptr(tbl->stats, cpu);
2176 ndst.ndts_allocs += READ_ONCE(st->allocs);
2177 ndst.ndts_destroys += READ_ONCE(st->destroys);
2178 ndst.ndts_hash_grows += READ_ONCE(st->hash_grows);
2179 ndst.ndts_res_failed += READ_ONCE(st->res_failed);
2180 ndst.ndts_lookups += READ_ONCE(st->lookups);
2181 ndst.ndts_hits += READ_ONCE(st->hits);
2182 ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast);
2183 ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast);
2184 ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs);
2185 ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs);
2186 ndst.ndts_table_fulls += READ_ONCE(st->table_fulls);
2189 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2190 NDTA_PAD))
2191 goto nla_put_failure;
2194 BUG_ON(tbl->parms.dev);
2195 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2196 goto nla_put_failure;
2198 read_unlock_bh(&tbl->lock);
2199 nlmsg_end(skb, nlh);
2200 return 0;
2202 nla_put_failure:
2203 read_unlock_bh(&tbl->lock);
2204 nlmsg_cancel(skb, nlh);
2205 return -EMSGSIZE;
2208 static int neightbl_fill_param_info(struct sk_buff *skb,
2209 struct neigh_table *tbl,
2210 struct neigh_parms *parms,
2211 u32 pid, u32 seq, int type,
2212 unsigned int flags)
2214 struct ndtmsg *ndtmsg;
2215 struct nlmsghdr *nlh;
2217 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2218 if (nlh == NULL)
2219 return -EMSGSIZE;
2221 ndtmsg = nlmsg_data(nlh);
2223 read_lock_bh(&tbl->lock);
2224 ndtmsg->ndtm_family = tbl->family;
2225 ndtmsg->ndtm_pad1 = 0;
2226 ndtmsg->ndtm_pad2 = 0;
2228 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2229 neightbl_fill_parms(skb, parms) < 0)
2230 goto errout;
2232 read_unlock_bh(&tbl->lock);
2233 nlmsg_end(skb, nlh);
2234 return 0;
2235 errout:
2236 read_unlock_bh(&tbl->lock);
2237 nlmsg_cancel(skb, nlh);
2238 return -EMSGSIZE;
2241 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2242 [NDTA_NAME] = { .type = NLA_STRING },
2243 [NDTA_THRESH1] = { .type = NLA_U32 },
2244 [NDTA_THRESH2] = { .type = NLA_U32 },
2245 [NDTA_THRESH3] = { .type = NLA_U32 },
2246 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2247 [NDTA_PARMS] = { .type = NLA_NESTED },
2250 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2251 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2252 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2253 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2254 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2255 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2256 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2257 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2258 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2259 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2260 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2261 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2262 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2263 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2264 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2265 [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
2268 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2269 struct netlink_ext_ack *extack)
2271 struct net *net = sock_net(skb->sk);
2272 struct neigh_table *tbl;
2273 struct ndtmsg *ndtmsg;
2274 struct nlattr *tb[NDTA_MAX+1];
2275 bool found = false;
2276 int err, tidx;
2278 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2279 nl_neightbl_policy, extack);
2280 if (err < 0)
2281 goto errout;
2283 if (tb[NDTA_NAME] == NULL) {
2284 err = -EINVAL;
2285 goto errout;
2288 ndtmsg = nlmsg_data(nlh);
2290 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2291 tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2292 if (!tbl)
2293 continue;
2294 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2295 continue;
2296 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2297 found = true;
2298 break;
2302 if (!found)
2303 return -ENOENT;
2306 * We acquire tbl->lock to be nice to the periodic timers and
2307 * make sure they always see a consistent set of values.
2309 write_lock_bh(&tbl->lock);
2311 if (tb[NDTA_PARMS]) {
2312 struct nlattr *tbp[NDTPA_MAX+1];
2313 struct neigh_parms *p;
2314 int i, ifindex = 0;
2316 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2317 tb[NDTA_PARMS],
2318 nl_ntbl_parm_policy, extack);
2319 if (err < 0)
2320 goto errout_tbl_lock;
2322 if (tbp[NDTPA_IFINDEX])
2323 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2325 p = lookup_neigh_parms(tbl, net, ifindex);
2326 if (p == NULL) {
2327 err = -ENOENT;
2328 goto errout_tbl_lock;
2331 for (i = 1; i <= NDTPA_MAX; i++) {
2332 if (tbp[i] == NULL)
2333 continue;
2335 switch (i) {
2336 case NDTPA_QUEUE_LEN:
2337 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2338 nla_get_u32(tbp[i]) *
2339 SKB_TRUESIZE(ETH_FRAME_LEN));
2340 break;
2341 case NDTPA_QUEUE_LENBYTES:
2342 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2343 nla_get_u32(tbp[i]));
2344 break;
2345 case NDTPA_PROXY_QLEN:
2346 NEIGH_VAR_SET(p, PROXY_QLEN,
2347 nla_get_u32(tbp[i]));
2348 break;
2349 case NDTPA_APP_PROBES:
2350 NEIGH_VAR_SET(p, APP_PROBES,
2351 nla_get_u32(tbp[i]));
2352 break;
2353 case NDTPA_UCAST_PROBES:
2354 NEIGH_VAR_SET(p, UCAST_PROBES,
2355 nla_get_u32(tbp[i]));
2356 break;
2357 case NDTPA_MCAST_PROBES:
2358 NEIGH_VAR_SET(p, MCAST_PROBES,
2359 nla_get_u32(tbp[i]));
2360 break;
2361 case NDTPA_MCAST_REPROBES:
2362 NEIGH_VAR_SET(p, MCAST_REPROBES,
2363 nla_get_u32(tbp[i]));
2364 break;
2365 case NDTPA_BASE_REACHABLE_TIME:
2366 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2367 nla_get_msecs(tbp[i]));
2368 /* update reachable_time as well, otherwise, the change will
2369 * only be effective after the next time neigh_periodic_work
2370 * decides to recompute it (can be multiple minutes)
2372 p->reachable_time =
2373 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2374 break;
2375 case NDTPA_GC_STALETIME:
2376 NEIGH_VAR_SET(p, GC_STALETIME,
2377 nla_get_msecs(tbp[i]));
2378 break;
2379 case NDTPA_DELAY_PROBE_TIME:
2380 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2381 nla_get_msecs(tbp[i]));
2382 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2383 break;
2384 case NDTPA_INTERVAL_PROBE_TIME_MS:
2385 NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2386 nla_get_msecs(tbp[i]));
2387 break;
2388 case NDTPA_RETRANS_TIME:
2389 NEIGH_VAR_SET(p, RETRANS_TIME,
2390 nla_get_msecs(tbp[i]));
2391 break;
2392 case NDTPA_ANYCAST_DELAY:
2393 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2394 nla_get_msecs(tbp[i]));
2395 break;
2396 case NDTPA_PROXY_DELAY:
2397 NEIGH_VAR_SET(p, PROXY_DELAY,
2398 nla_get_msecs(tbp[i]));
2399 break;
2400 case NDTPA_LOCKTIME:
2401 NEIGH_VAR_SET(p, LOCKTIME,
2402 nla_get_msecs(tbp[i]));
2403 break;
2408 err = -ENOENT;
2409 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2410 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2411 !net_eq(net, &init_net))
2412 goto errout_tbl_lock;
2414 if (tb[NDTA_THRESH1])
2415 WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2417 if (tb[NDTA_THRESH2])
2418 WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2420 if (tb[NDTA_THRESH3])
2421 WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2423 if (tb[NDTA_GC_INTERVAL])
2424 WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2426 err = 0;
2428 errout_tbl_lock:
2429 write_unlock_bh(&tbl->lock);
2430 errout:
2431 return err;
2434 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2435 struct netlink_ext_ack *extack)
2437 struct ndtmsg *ndtm;
2439 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2440 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2441 return -EINVAL;
2444 ndtm = nlmsg_data(nlh);
2445 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2446 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2447 return -EINVAL;
2450 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2451 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2452 return -EINVAL;
2455 return 0;
2458 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2460 const struct nlmsghdr *nlh = cb->nlh;
2461 struct net *net = sock_net(skb->sk);
2462 int family, tidx, nidx = 0;
2463 int tbl_skip = cb->args[0];
2464 int neigh_skip = cb->args[1];
2465 struct neigh_table *tbl;
2467 if (cb->strict_check) {
2468 int err = neightbl_valid_dump_info(nlh, cb->extack);
2470 if (err < 0)
2471 return err;
2474 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2476 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2477 struct neigh_parms *p;
2479 tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2480 if (!tbl)
2481 continue;
2483 if (tidx < tbl_skip || (family && tbl->family != family))
2484 continue;
2486 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2487 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2488 NLM_F_MULTI) < 0)
2489 break;
2491 nidx = 0;
2492 p = list_next_entry(&tbl->parms, list);
2493 list_for_each_entry_from(p, &tbl->parms_list, list) {
2494 if (!net_eq(neigh_parms_net(p), net))
2495 continue;
2497 if (nidx < neigh_skip)
2498 goto next;
2500 if (neightbl_fill_param_info(skb, tbl, p,
2501 NETLINK_CB(cb->skb).portid,
2502 nlh->nlmsg_seq,
2503 RTM_NEWNEIGHTBL,
2504 NLM_F_MULTI) < 0)
2505 goto out;
2506 next:
2507 nidx++;
2510 neigh_skip = 0;
2512 out:
2513 cb->args[0] = tidx;
2514 cb->args[1] = nidx;
2516 return skb->len;
2519 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2520 u32 pid, u32 seq, int type, unsigned int flags)
2522 u32 neigh_flags, neigh_flags_ext;
2523 unsigned long now = jiffies;
2524 struct nda_cacheinfo ci;
2525 struct nlmsghdr *nlh;
2526 struct ndmsg *ndm;
2528 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2529 if (nlh == NULL)
2530 return -EMSGSIZE;
2532 neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2533 neigh_flags = neigh->flags & NTF_OLD_MASK;
2535 ndm = nlmsg_data(nlh);
2536 ndm->ndm_family = neigh->ops->family;
2537 ndm->ndm_pad1 = 0;
2538 ndm->ndm_pad2 = 0;
2539 ndm->ndm_flags = neigh_flags;
2540 ndm->ndm_type = neigh->type;
2541 ndm->ndm_ifindex = neigh->dev->ifindex;
2543 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2544 goto nla_put_failure;
2546 read_lock_bh(&neigh->lock);
2547 ndm->ndm_state = neigh->nud_state;
2548 if (neigh->nud_state & NUD_VALID) {
2549 char haddr[MAX_ADDR_LEN];
2551 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2552 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2553 read_unlock_bh(&neigh->lock);
2554 goto nla_put_failure;
2558 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2559 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2560 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2561 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2562 read_unlock_bh(&neigh->lock);
2564 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2565 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2566 goto nla_put_failure;
2568 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2569 goto nla_put_failure;
2570 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2571 goto nla_put_failure;
2573 nlmsg_end(skb, nlh);
2574 return 0;
2576 nla_put_failure:
2577 nlmsg_cancel(skb, nlh);
2578 return -EMSGSIZE;
2581 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2582 u32 pid, u32 seq, int type, unsigned int flags,
2583 struct neigh_table *tbl)
2585 u32 neigh_flags, neigh_flags_ext;
2586 struct nlmsghdr *nlh;
2587 struct ndmsg *ndm;
2589 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2590 if (nlh == NULL)
2591 return -EMSGSIZE;
2593 neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2594 neigh_flags = pn->flags & NTF_OLD_MASK;
2596 ndm = nlmsg_data(nlh);
2597 ndm->ndm_family = tbl->family;
2598 ndm->ndm_pad1 = 0;
2599 ndm->ndm_pad2 = 0;
2600 ndm->ndm_flags = neigh_flags | NTF_PROXY;
2601 ndm->ndm_type = RTN_UNICAST;
2602 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2603 ndm->ndm_state = NUD_NONE;
2605 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2606 goto nla_put_failure;
2608 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2609 goto nla_put_failure;
2610 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2611 goto nla_put_failure;
2613 nlmsg_end(skb, nlh);
2614 return 0;
2616 nla_put_failure:
2617 nlmsg_cancel(skb, nlh);
2618 return -EMSGSIZE;
2621 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2623 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2624 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2627 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2629 struct net_device *master;
2631 if (!master_idx)
2632 return false;
2634 master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2636 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2637 * invalid value for ifindex to denote "no master".
2639 if (master_idx == -1)
2640 return !!master;
2642 if (!master || master->ifindex != master_idx)
2643 return true;
2645 return false;
2648 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2650 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2651 return true;
2653 return false;
2656 struct neigh_dump_filter {
2657 int master_idx;
2658 int dev_idx;
2661 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2662 struct netlink_callback *cb,
2663 struct neigh_dump_filter *filter)
2665 struct net *net = sock_net(skb->sk);
2666 struct neighbour *n;
2667 int err = 0, h, s_h = cb->args[1];
2668 int idx, s_idx = idx = cb->args[2];
2669 struct neigh_hash_table *nht;
2670 unsigned int flags = NLM_F_MULTI;
2672 if (filter->dev_idx || filter->master_idx)
2673 flags |= NLM_F_DUMP_FILTERED;
2675 nht = rcu_dereference(tbl->nht);
2677 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2678 if (h > s_h)
2679 s_idx = 0;
2680 idx = 0;
2681 neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2682 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2683 goto next;
2684 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2685 neigh_master_filtered(n->dev, filter->master_idx))
2686 goto next;
2687 err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2688 cb->nlh->nlmsg_seq,
2689 RTM_NEWNEIGH, flags);
2690 if (err < 0)
2691 goto out;
2692 next:
2693 idx++;
2696 out:
2697 cb->args[1] = h;
2698 cb->args[2] = idx;
2699 return err;
2702 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2703 struct netlink_callback *cb,
2704 struct neigh_dump_filter *filter)
2706 struct pneigh_entry *n;
2707 struct net *net = sock_net(skb->sk);
2708 int err = 0, h, s_h = cb->args[3];
2709 int idx, s_idx = idx = cb->args[4];
2710 unsigned int flags = NLM_F_MULTI;
2712 if (filter->dev_idx || filter->master_idx)
2713 flags |= NLM_F_DUMP_FILTERED;
2715 read_lock_bh(&tbl->lock);
2717 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2718 if (h > s_h)
2719 s_idx = 0;
2720 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2721 if (idx < s_idx || pneigh_net(n) != net)
2722 goto next;
2723 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2724 neigh_master_filtered(n->dev, filter->master_idx))
2725 goto next;
2726 err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2727 cb->nlh->nlmsg_seq,
2728 RTM_NEWNEIGH, flags, tbl);
2729 if (err < 0) {
2730 read_unlock_bh(&tbl->lock);
2731 goto out;
2733 next:
2734 idx++;
2738 read_unlock_bh(&tbl->lock);
2739 out:
2740 cb->args[3] = h;
2741 cb->args[4] = idx;
2742 return err;
2745 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2746 bool strict_check,
2747 struct neigh_dump_filter *filter,
2748 struct netlink_ext_ack *extack)
2750 struct nlattr *tb[NDA_MAX + 1];
2751 int err, i;
2753 if (strict_check) {
2754 struct ndmsg *ndm;
2756 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2757 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2758 return -EINVAL;
2761 ndm = nlmsg_data(nlh);
2762 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2763 ndm->ndm_state || ndm->ndm_type) {
2764 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2765 return -EINVAL;
2768 if (ndm->ndm_flags & ~NTF_PROXY) {
2769 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2770 return -EINVAL;
2773 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2774 tb, NDA_MAX, nda_policy,
2775 extack);
2776 } else {
2777 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2778 NDA_MAX, nda_policy, extack);
2780 if (err < 0)
2781 return err;
2783 for (i = 0; i <= NDA_MAX; ++i) {
2784 if (!tb[i])
2785 continue;
2787 /* all new attributes should require strict_check */
2788 switch (i) {
2789 case NDA_IFINDEX:
2790 filter->dev_idx = nla_get_u32(tb[i]);
2791 break;
2792 case NDA_MASTER:
2793 filter->master_idx = nla_get_u32(tb[i]);
2794 break;
2795 default:
2796 if (strict_check) {
2797 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2798 return -EINVAL;
2803 return 0;
2806 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2808 const struct nlmsghdr *nlh = cb->nlh;
2809 struct neigh_dump_filter filter = {};
2810 struct neigh_table *tbl;
2811 int t, family, s_t;
2812 int proxy = 0;
2813 int err;
2815 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2817 /* check for full ndmsg structure presence, family member is
2818 * the same for both structures
2820 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2821 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2822 proxy = 1;
2824 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2825 if (err < 0 && cb->strict_check)
2826 return err;
2827 err = 0;
2829 s_t = cb->args[0];
2831 rcu_read_lock();
2832 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2833 tbl = rcu_dereference(neigh_tables[t]);
2835 if (!tbl)
2836 continue;
2837 if (t < s_t || (family && tbl->family != family))
2838 continue;
2839 if (t > s_t)
2840 memset(&cb->args[1], 0, sizeof(cb->args) -
2841 sizeof(cb->args[0]));
2842 if (proxy)
2843 err = pneigh_dump_table(tbl, skb, cb, &filter);
2844 else
2845 err = neigh_dump_table(tbl, skb, cb, &filter);
2846 if (err < 0)
2847 break;
2849 rcu_read_unlock();
2851 cb->args[0] = t;
2852 return err;
2855 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2856 struct neigh_table **tbl,
2857 void **dst, int *dev_idx, u8 *ndm_flags,
2858 struct netlink_ext_ack *extack)
2860 struct nlattr *tb[NDA_MAX + 1];
2861 struct ndmsg *ndm;
2862 int err, i;
2864 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2865 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2866 return -EINVAL;
2869 ndm = nlmsg_data(nlh);
2870 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2871 ndm->ndm_type) {
2872 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2873 return -EINVAL;
2876 if (ndm->ndm_flags & ~NTF_PROXY) {
2877 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2878 return -EINVAL;
2881 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2882 NDA_MAX, nda_policy, extack);
2883 if (err < 0)
2884 return err;
2886 *ndm_flags = ndm->ndm_flags;
2887 *dev_idx = ndm->ndm_ifindex;
2888 *tbl = neigh_find_table(ndm->ndm_family);
2889 if (*tbl == NULL) {
2890 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2891 return -EAFNOSUPPORT;
2894 for (i = 0; i <= NDA_MAX; ++i) {
2895 if (!tb[i])
2896 continue;
2898 switch (i) {
2899 case NDA_DST:
2900 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2901 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2902 return -EINVAL;
2904 *dst = nla_data(tb[i]);
2905 break;
2906 default:
2907 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2908 return -EINVAL;
2912 return 0;
2915 static inline size_t neigh_nlmsg_size(void)
2917 return NLMSG_ALIGN(sizeof(struct ndmsg))
2918 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2919 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2920 + nla_total_size(sizeof(struct nda_cacheinfo))
2921 + nla_total_size(4) /* NDA_PROBES */
2922 + nla_total_size(4) /* NDA_FLAGS_EXT */
2923 + nla_total_size(1); /* NDA_PROTOCOL */
2926 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2927 u32 pid, u32 seq)
2929 struct sk_buff *skb;
2930 int err = 0;
2932 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2933 if (!skb)
2934 return -ENOBUFS;
2936 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2937 if (err) {
2938 kfree_skb(skb);
2939 goto errout;
2942 err = rtnl_unicast(skb, net, pid);
2943 errout:
2944 return err;
2947 static inline size_t pneigh_nlmsg_size(void)
2949 return NLMSG_ALIGN(sizeof(struct ndmsg))
2950 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2951 + nla_total_size(4) /* NDA_FLAGS_EXT */
2952 + nla_total_size(1); /* NDA_PROTOCOL */
2955 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2956 u32 pid, u32 seq, struct neigh_table *tbl)
2958 struct sk_buff *skb;
2959 int err = 0;
2961 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2962 if (!skb)
2963 return -ENOBUFS;
2965 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2966 if (err) {
2967 kfree_skb(skb);
2968 goto errout;
2971 err = rtnl_unicast(skb, net, pid);
2972 errout:
2973 return err;
2976 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2977 struct netlink_ext_ack *extack)
2979 struct net *net = sock_net(in_skb->sk);
2980 struct net_device *dev = NULL;
2981 struct neigh_table *tbl = NULL;
2982 struct neighbour *neigh;
2983 void *dst = NULL;
2984 u8 ndm_flags = 0;
2985 int dev_idx = 0;
2986 int err;
2988 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2989 extack);
2990 if (err < 0)
2991 return err;
2993 if (dev_idx) {
2994 dev = __dev_get_by_index(net, dev_idx);
2995 if (!dev) {
2996 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2997 return -ENODEV;
3001 if (!dst) {
3002 NL_SET_ERR_MSG(extack, "Network address not specified");
3003 return -EINVAL;
3006 if (ndm_flags & NTF_PROXY) {
3007 struct pneigh_entry *pn;
3009 pn = pneigh_lookup(tbl, net, dst, dev, 0);
3010 if (!pn) {
3011 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3012 return -ENOENT;
3014 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3015 nlh->nlmsg_seq, tbl);
3018 if (!dev) {
3019 NL_SET_ERR_MSG(extack, "No device specified");
3020 return -EINVAL;
3023 neigh = neigh_lookup(tbl, dst, dev);
3024 if (!neigh) {
3025 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3026 return -ENOENT;
3029 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3030 nlh->nlmsg_seq);
3032 neigh_release(neigh);
3034 return err;
3037 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3039 int chain;
3040 struct neigh_hash_table *nht;
3042 rcu_read_lock();
3043 nht = rcu_dereference(tbl->nht);
3045 read_lock_bh(&tbl->lock); /* avoid resizes */
3046 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3047 struct neighbour *n;
3049 neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3050 cb(n, cookie);
3052 read_unlock_bh(&tbl->lock);
3053 rcu_read_unlock();
3055 EXPORT_SYMBOL(neigh_for_each);
3057 /* The tbl->lock must be held as a writer and BH disabled. */
3058 void __neigh_for_each_release(struct neigh_table *tbl,
3059 int (*cb)(struct neighbour *))
3061 struct neigh_hash_table *nht;
3062 int chain;
3064 nht = rcu_dereference_protected(tbl->nht,
3065 lockdep_is_held(&tbl->lock));
3066 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3067 struct hlist_node *tmp;
3068 struct neighbour *n;
3070 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3071 int release;
3073 write_lock(&n->lock);
3074 release = cb(n);
3075 if (release) {
3076 hlist_del_rcu(&n->hash);
3077 hlist_del_rcu(&n->dev_list);
3078 neigh_mark_dead(n);
3080 write_unlock(&n->lock);
3081 if (release)
3082 neigh_cleanup_and_release(n);
3086 EXPORT_SYMBOL(__neigh_for_each_release);
3088 int neigh_xmit(int index, struct net_device *dev,
3089 const void *addr, struct sk_buff *skb)
3091 int err = -EAFNOSUPPORT;
3093 if (likely(index < NEIGH_NR_TABLES)) {
3094 struct neigh_table *tbl;
3095 struct neighbour *neigh;
3097 rcu_read_lock();
3098 tbl = rcu_dereference(neigh_tables[index]);
3099 if (!tbl)
3100 goto out_unlock;
3101 if (index == NEIGH_ARP_TABLE) {
3102 u32 key = *((u32 *)addr);
3104 neigh = __ipv4_neigh_lookup_noref(dev, key);
3105 } else {
3106 neigh = __neigh_lookup_noref(tbl, addr, dev);
3108 if (!neigh)
3109 neigh = __neigh_create(tbl, addr, dev, false);
3110 err = PTR_ERR(neigh);
3111 if (IS_ERR(neigh)) {
3112 rcu_read_unlock();
3113 goto out_kfree_skb;
3115 err = READ_ONCE(neigh->output)(neigh, skb);
3116 out_unlock:
3117 rcu_read_unlock();
3119 else if (index == NEIGH_LINK_TABLE) {
3120 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3121 addr, NULL, skb->len);
3122 if (err < 0)
3123 goto out_kfree_skb;
3124 err = dev_queue_xmit(skb);
3126 out:
3127 return err;
3128 out_kfree_skb:
3129 kfree_skb(skb);
3130 goto out;
3132 EXPORT_SYMBOL(neigh_xmit);
3134 #ifdef CONFIG_PROC_FS
3136 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3137 struct neighbour *n,
3138 loff_t *pos)
3140 struct neigh_seq_state *state = seq->private;
3141 struct net *net = seq_file_net(seq);
3143 if (!net_eq(dev_net(n->dev), net))
3144 return NULL;
3146 if (state->neigh_sub_iter) {
3147 loff_t fakep = 0;
3148 void *v;
3150 v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3151 if (!v)
3152 return NULL;
3153 if (pos)
3154 return v;
3157 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3158 return n;
3160 if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3161 return n;
3163 return NULL;
3166 static struct neighbour *neigh_get_first(struct seq_file *seq)
3168 struct neigh_seq_state *state = seq->private;
3169 struct neigh_hash_table *nht = state->nht;
3170 struct neighbour *n, *tmp;
3172 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3174 while (++state->bucket < (1 << nht->hash_shift)) {
3175 neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3176 tmp = neigh_get_valid(seq, n, NULL);
3177 if (tmp)
3178 return tmp;
3182 return NULL;
3185 static struct neighbour *neigh_get_next(struct seq_file *seq,
3186 struct neighbour *n,
3187 loff_t *pos)
3189 struct neigh_seq_state *state = seq->private;
3190 struct neighbour *tmp;
3192 if (state->neigh_sub_iter) {
3193 void *v = state->neigh_sub_iter(state, n, pos);
3195 if (v)
3196 return n;
3199 hlist_for_each_entry_continue(n, hash) {
3200 tmp = neigh_get_valid(seq, n, pos);
3201 if (tmp) {
3202 n = tmp;
3203 goto out;
3207 n = neigh_get_first(seq);
3208 out:
3209 if (n && pos)
3210 --(*pos);
3212 return n;
3215 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3217 struct neighbour *n = neigh_get_first(seq);
3219 if (n) {
3220 --(*pos);
3221 while (*pos) {
3222 n = neigh_get_next(seq, n, pos);
3223 if (!n)
3224 break;
3227 return *pos ? NULL : n;
3230 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3232 struct neigh_seq_state *state = seq->private;
3233 struct net *net = seq_file_net(seq);
3234 struct neigh_table *tbl = state->tbl;
3235 struct pneigh_entry *pn = NULL;
3236 int bucket;
3238 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3239 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3240 pn = tbl->phash_buckets[bucket];
3241 while (pn && !net_eq(pneigh_net(pn), net))
3242 pn = pn->next;
3243 if (pn)
3244 break;
3246 state->bucket = bucket;
3248 return pn;
3251 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3252 struct pneigh_entry *pn,
3253 loff_t *pos)
3255 struct neigh_seq_state *state = seq->private;
3256 struct net *net = seq_file_net(seq);
3257 struct neigh_table *tbl = state->tbl;
3259 do {
3260 pn = pn->next;
3261 } while (pn && !net_eq(pneigh_net(pn), net));
3263 while (!pn) {
3264 if (++state->bucket > PNEIGH_HASHMASK)
3265 break;
3266 pn = tbl->phash_buckets[state->bucket];
3267 while (pn && !net_eq(pneigh_net(pn), net))
3268 pn = pn->next;
3269 if (pn)
3270 break;
3273 if (pn && pos)
3274 --(*pos);
3276 return pn;
3279 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3281 struct pneigh_entry *pn = pneigh_get_first(seq);
3283 if (pn) {
3284 --(*pos);
3285 while (*pos) {
3286 pn = pneigh_get_next(seq, pn, pos);
3287 if (!pn)
3288 break;
3291 return *pos ? NULL : pn;
3294 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3296 struct neigh_seq_state *state = seq->private;
3297 void *rc;
3298 loff_t idxpos = *pos;
3300 rc = neigh_get_idx(seq, &idxpos);
3301 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3302 rc = pneigh_get_idx(seq, &idxpos);
3304 return rc;
3307 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3308 __acquires(tbl->lock)
3309 __acquires(rcu)
3311 struct neigh_seq_state *state = seq->private;
3313 state->tbl = tbl;
3314 state->bucket = -1;
3315 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3317 rcu_read_lock();
3318 state->nht = rcu_dereference(tbl->nht);
3319 read_lock_bh(&tbl->lock);
3321 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3323 EXPORT_SYMBOL(neigh_seq_start);
3325 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3327 struct neigh_seq_state *state;
3328 void *rc;
3330 if (v == SEQ_START_TOKEN) {
3331 rc = neigh_get_first(seq);
3332 goto out;
3335 state = seq->private;
3336 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3337 rc = neigh_get_next(seq, v, NULL);
3338 if (rc)
3339 goto out;
3340 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3341 rc = pneigh_get_first(seq);
3342 } else {
3343 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3344 rc = pneigh_get_next(seq, v, NULL);
3346 out:
3347 ++(*pos);
3348 return rc;
3350 EXPORT_SYMBOL(neigh_seq_next);
3352 void neigh_seq_stop(struct seq_file *seq, void *v)
3353 __releases(tbl->lock)
3354 __releases(rcu)
3356 struct neigh_seq_state *state = seq->private;
3357 struct neigh_table *tbl = state->tbl;
3359 read_unlock_bh(&tbl->lock);
3360 rcu_read_unlock();
3362 EXPORT_SYMBOL(neigh_seq_stop);
3364 /* statistics via seq_file */
3366 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3368 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3369 int cpu;
3371 if (*pos == 0)
3372 return SEQ_START_TOKEN;
3374 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3375 if (!cpu_possible(cpu))
3376 continue;
3377 *pos = cpu+1;
3378 return per_cpu_ptr(tbl->stats, cpu);
3380 return NULL;
3383 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3385 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3386 int cpu;
3388 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3389 if (!cpu_possible(cpu))
3390 continue;
3391 *pos = cpu+1;
3392 return per_cpu_ptr(tbl->stats, cpu);
3394 (*pos)++;
3395 return NULL;
3398 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3403 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3405 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3406 struct neigh_statistics *st = v;
3408 if (v == SEQ_START_TOKEN) {
3409 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3410 return 0;
3413 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3414 "%08lx %08lx %08lx "
3415 "%08lx %08lx %08lx\n",
3416 atomic_read(&tbl->entries),
3418 st->allocs,
3419 st->destroys,
3420 st->hash_grows,
3422 st->lookups,
3423 st->hits,
3425 st->res_failed,
3427 st->rcv_probes_mcast,
3428 st->rcv_probes_ucast,
3430 st->periodic_gc_runs,
3431 st->forced_gc_runs,
3432 st->unres_discards,
3433 st->table_fulls
3436 return 0;
3439 static const struct seq_operations neigh_stat_seq_ops = {
3440 .start = neigh_stat_seq_start,
3441 .next = neigh_stat_seq_next,
3442 .stop = neigh_stat_seq_stop,
3443 .show = neigh_stat_seq_show,
3445 #endif /* CONFIG_PROC_FS */
3447 static void __neigh_notify(struct neighbour *n, int type, int flags,
3448 u32 pid)
3450 struct net *net = dev_net(n->dev);
3451 struct sk_buff *skb;
3452 int err = -ENOBUFS;
3454 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3455 if (skb == NULL)
3456 goto errout;
3458 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3459 if (err < 0) {
3460 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3461 WARN_ON(err == -EMSGSIZE);
3462 kfree_skb(skb);
3463 goto errout;
3465 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3466 return;
3467 errout:
3468 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3471 void neigh_app_ns(struct neighbour *n)
3473 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3475 EXPORT_SYMBOL(neigh_app_ns);
3477 #ifdef CONFIG_SYSCTL
3478 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3480 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3481 void *buffer, size_t *lenp, loff_t *ppos)
3483 int size, ret;
3484 struct ctl_table tmp = *ctl;
3486 tmp.extra1 = SYSCTL_ZERO;
3487 tmp.extra2 = &unres_qlen_max;
3488 tmp.data = &size;
3490 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3491 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3493 if (write && !ret)
3494 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3495 return ret;
3498 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3499 int index)
3501 struct net_device *dev;
3502 int family = neigh_parms_family(p);
3504 rcu_read_lock();
3505 for_each_netdev_rcu(net, dev) {
3506 struct neigh_parms *dst_p =
3507 neigh_get_dev_parms_rcu(dev, family);
3509 if (dst_p && !test_bit(index, dst_p->data_state))
3510 dst_p->data[index] = p->data[index];
3512 rcu_read_unlock();
3515 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3517 struct net_device *dev = ctl->extra1;
3518 struct neigh_parms *p = ctl->extra2;
3519 struct net *net = neigh_parms_net(p);
3520 int index = (int *) ctl->data - p->data;
3522 if (!write)
3523 return;
3525 set_bit(index, p->data_state);
3526 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3527 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3528 if (!dev) /* NULL dev means this is default value */
3529 neigh_copy_dflt_parms(net, p, index);
3532 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3533 void *buffer, size_t *lenp,
3534 loff_t *ppos)
3536 struct ctl_table tmp = *ctl;
3537 int ret;
3539 tmp.extra1 = SYSCTL_ZERO;
3540 tmp.extra2 = SYSCTL_INT_MAX;
3542 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3543 neigh_proc_update(ctl, write);
3544 return ret;
3547 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3548 void *buffer, size_t *lenp, loff_t *ppos)
3550 struct ctl_table tmp = *ctl;
3551 int ret;
3553 int min = msecs_to_jiffies(1);
3555 tmp.extra1 = &min;
3556 tmp.extra2 = NULL;
3558 ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3559 neigh_proc_update(ctl, write);
3560 return ret;
3563 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3564 size_t *lenp, loff_t *ppos)
3566 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3568 neigh_proc_update(ctl, write);
3569 return ret;
3571 EXPORT_SYMBOL(neigh_proc_dointvec);
3573 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3574 size_t *lenp, loff_t *ppos)
3576 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3578 neigh_proc_update(ctl, write);
3579 return ret;
3581 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3583 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3584 void *buffer, size_t *lenp,
3585 loff_t *ppos)
3587 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3589 neigh_proc_update(ctl, write);
3590 return ret;
3593 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3594 void *buffer, size_t *lenp, loff_t *ppos)
3596 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3598 neigh_proc_update(ctl, write);
3599 return ret;
3601 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3603 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3604 void *buffer, size_t *lenp,
3605 loff_t *ppos)
3607 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3609 neigh_proc_update(ctl, write);
3610 return ret;
3613 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3614 void *buffer, size_t *lenp,
3615 loff_t *ppos)
3617 struct neigh_parms *p = ctl->extra2;
3618 int ret;
3620 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3621 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3622 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3623 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3624 else
3625 ret = -1;
3627 if (write && ret == 0) {
3628 /* update reachable_time as well, otherwise, the change will
3629 * only be effective after the next time neigh_periodic_work
3630 * decides to recompute it
3632 p->reachable_time =
3633 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3635 return ret;
3638 #define NEIGH_PARMS_DATA_OFFSET(index) \
3639 (&((struct neigh_parms *) 0)->data[index])
3641 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3642 [NEIGH_VAR_ ## attr] = { \
3643 .procname = name, \
3644 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3645 .maxlen = sizeof(int), \
3646 .mode = mval, \
3647 .proc_handler = proc, \
3650 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3651 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3653 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3654 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3656 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3657 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3659 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3660 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3662 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3663 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3665 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3666 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3668 static struct neigh_sysctl_table {
3669 struct ctl_table_header *sysctl_header;
3670 struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3671 } neigh_sysctl_template __read_mostly = {
3672 .neigh_vars = {
3673 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3674 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3675 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3676 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3677 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3678 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3679 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3680 NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3681 "interval_probe_time_ms"),
3682 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3683 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3684 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3685 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3686 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3687 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3688 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3689 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3690 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3691 [NEIGH_VAR_GC_INTERVAL] = {
3692 .procname = "gc_interval",
3693 .maxlen = sizeof(int),
3694 .mode = 0644,
3695 .proc_handler = proc_dointvec_jiffies,
3697 [NEIGH_VAR_GC_THRESH1] = {
3698 .procname = "gc_thresh1",
3699 .maxlen = sizeof(int),
3700 .mode = 0644,
3701 .extra1 = SYSCTL_ZERO,
3702 .extra2 = SYSCTL_INT_MAX,
3703 .proc_handler = proc_dointvec_minmax,
3705 [NEIGH_VAR_GC_THRESH2] = {
3706 .procname = "gc_thresh2",
3707 .maxlen = sizeof(int),
3708 .mode = 0644,
3709 .extra1 = SYSCTL_ZERO,
3710 .extra2 = SYSCTL_INT_MAX,
3711 .proc_handler = proc_dointvec_minmax,
3713 [NEIGH_VAR_GC_THRESH3] = {
3714 .procname = "gc_thresh3",
3715 .maxlen = sizeof(int),
3716 .mode = 0644,
3717 .extra1 = SYSCTL_ZERO,
3718 .extra2 = SYSCTL_INT_MAX,
3719 .proc_handler = proc_dointvec_minmax,
3724 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3725 proc_handler *handler)
3727 int i;
3728 struct neigh_sysctl_table *t;
3729 const char *dev_name_source;
3730 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3731 char *p_name;
3732 size_t neigh_vars_size;
3734 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3735 if (!t)
3736 goto err;
3738 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3739 t->neigh_vars[i].data += (long) p;
3740 t->neigh_vars[i].extra1 = dev;
3741 t->neigh_vars[i].extra2 = p;
3744 neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3745 if (dev) {
3746 dev_name_source = dev->name;
3747 /* Terminate the table early */
3748 neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3749 } else {
3750 struct neigh_table *tbl = p->tbl;
3751 dev_name_source = "default";
3752 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3753 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3754 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3755 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3758 if (handler) {
3759 /* RetransTime */
3760 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3761 /* ReachableTime */
3762 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3763 /* RetransTime (in milliseconds)*/
3764 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3765 /* ReachableTime (in milliseconds) */
3766 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3767 } else {
3768 /* Those handlers will update p->reachable_time after
3769 * base_reachable_time(_ms) is set to ensure the new timer starts being
3770 * applied after the next neighbour update instead of waiting for
3771 * neigh_periodic_work to update its value (can be multiple minutes)
3772 * So any handler that replaces them should do this as well
3774 /* ReachableTime */
3775 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3776 neigh_proc_base_reachable_time;
3777 /* ReachableTime (in milliseconds) */
3778 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3779 neigh_proc_base_reachable_time;
3782 switch (neigh_parms_family(p)) {
3783 case AF_INET:
3784 p_name = "ipv4";
3785 break;
3786 case AF_INET6:
3787 p_name = "ipv6";
3788 break;
3789 default:
3790 BUG();
3793 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3794 p_name, dev_name_source);
3795 t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3796 neigh_path, t->neigh_vars,
3797 neigh_vars_size);
3798 if (!t->sysctl_header)
3799 goto free;
3801 p->sysctl_table = t;
3802 return 0;
3804 free:
3805 kfree(t);
3806 err:
3807 return -ENOBUFS;
3809 EXPORT_SYMBOL(neigh_sysctl_register);
3811 void neigh_sysctl_unregister(struct neigh_parms *p)
3813 if (p->sysctl_table) {
3814 struct neigh_sysctl_table *t = p->sysctl_table;
3815 p->sysctl_table = NULL;
3816 unregister_net_sysctl_table(t->sysctl_header);
3817 kfree(t);
3820 EXPORT_SYMBOL(neigh_sysctl_unregister);
3822 #endif /* CONFIG_SYSCTL */
3824 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3825 {.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3826 {.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3827 {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3828 .flags = RTNL_FLAG_DUMP_UNLOCKED},
3829 {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
3830 {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
3833 static int __init neigh_init(void)
3835 rtnl_register_many(neigh_rtnl_msg_handlers);
3836 return 0;
3839 subsys_initcall(neigh_init);