1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
24 #include <linux/sysctl.h>
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/log2.h>
38 #include <linux/inetdevice.h>
39 #include <net/addrconf.h>
41 #include <trace/events/neigh.h>
44 #define neigh_dbg(level, fmt, ...) \
46 if (level <= NEIGH_DEBUG) \
47 pr_debug(fmt, ##__VA_ARGS__); \
50 #define PNEIGH_HASHMASK 0xF
52 static void neigh_timer_handler(struct timer_list
*t
);
53 static void __neigh_notify(struct neighbour
*n
, int type
, int flags
,
55 static void neigh_update_notify(struct neighbour
*neigh
, u32 nlmsg_pid
);
56 static int pneigh_ifdown_and_unlock(struct neigh_table
*tbl
,
57 struct net_device
*dev
);
60 static const struct seq_operations neigh_stat_seq_ops
;
63 static struct hlist_head
*neigh_get_dev_table(struct net_device
*dev
, int family
)
69 DEBUG_NET_WARN_ON_ONCE(1);
70 fallthrough
; /* to avoid panic by null-ptr-deref */
79 return &dev
->neighbours
[i
];
83 Neighbour hash table buckets are protected with rwlock tbl->lock.
85 - All the scans/updates to hash buckets MUST be made under this lock.
86 - NOTHING clever should be made under this lock: no callbacks
87 to protocol backends, no attempts to send something to network.
88 It will result in deadlocks, if backend/driver wants to use neighbour
90 - If the entry requires some non-trivial actions, increase
91 its reference count and release table lock.
93 Neighbour entries are protected:
94 - with reference count.
95 - with rwlock neigh->lock
97 Reference count prevents destruction.
99 neigh->lock mainly serializes ll address data and its validity state.
100 However, the same lock is used to protect another entry fields:
104 Again, nothing clever shall be made under neigh->lock,
105 the most complicated procedure, which we allow is dev->hard_header.
106 It is supposed, that dev->hard_header is simplistic and does
107 not make callbacks to neighbour tables.
110 static int neigh_blackhole(struct neighbour
*neigh
, struct sk_buff
*skb
)
116 static void neigh_cleanup_and_release(struct neighbour
*neigh
)
118 trace_neigh_cleanup_and_release(neigh
, 0);
119 __neigh_notify(neigh
, RTM_DELNEIGH
, 0, 0);
120 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE
, neigh
);
121 neigh_release(neigh
);
125 * It is random distribution in the interval (1/2)*base...(3/2)*base.
126 * It corresponds to default IPv6 settings and is not overridable,
127 * because it is really reasonable choice.
130 unsigned long neigh_rand_reach_time(unsigned long base
)
132 return base
? get_random_u32_below(base
) + (base
>> 1) : 0;
134 EXPORT_SYMBOL(neigh_rand_reach_time
);
136 static void neigh_mark_dead(struct neighbour
*n
)
139 if (!list_empty(&n
->gc_list
)) {
140 list_del_init(&n
->gc_list
);
141 atomic_dec(&n
->tbl
->gc_entries
);
143 if (!list_empty(&n
->managed_list
))
144 list_del_init(&n
->managed_list
);
147 static void neigh_update_gc_list(struct neighbour
*n
)
149 bool on_gc_list
, exempt_from_gc
;
151 write_lock_bh(&n
->tbl
->lock
);
152 write_lock(&n
->lock
);
156 /* remove from the gc list if new state is permanent or if neighbor
157 * is externally learned; otherwise entry should be on the gc list
159 exempt_from_gc
= n
->nud_state
& NUD_PERMANENT
||
160 n
->flags
& NTF_EXT_LEARNED
;
161 on_gc_list
= !list_empty(&n
->gc_list
);
163 if (exempt_from_gc
&& on_gc_list
) {
164 list_del_init(&n
->gc_list
);
165 atomic_dec(&n
->tbl
->gc_entries
);
166 } else if (!exempt_from_gc
&& !on_gc_list
) {
167 /* add entries to the tail; cleaning removes from the front */
168 list_add_tail(&n
->gc_list
, &n
->tbl
->gc_list
);
169 atomic_inc(&n
->tbl
->gc_entries
);
172 write_unlock(&n
->lock
);
173 write_unlock_bh(&n
->tbl
->lock
);
176 static void neigh_update_managed_list(struct neighbour
*n
)
178 bool on_managed_list
, add_to_managed
;
180 write_lock_bh(&n
->tbl
->lock
);
181 write_lock(&n
->lock
);
185 add_to_managed
= n
->flags
& NTF_MANAGED
;
186 on_managed_list
= !list_empty(&n
->managed_list
);
188 if (!add_to_managed
&& on_managed_list
)
189 list_del_init(&n
->managed_list
);
190 else if (add_to_managed
&& !on_managed_list
)
191 list_add_tail(&n
->managed_list
, &n
->tbl
->managed_list
);
193 write_unlock(&n
->lock
);
194 write_unlock_bh(&n
->tbl
->lock
);
197 static void neigh_update_flags(struct neighbour
*neigh
, u32 flags
, int *notify
,
198 bool *gc_update
, bool *managed_update
)
200 u32 ndm_flags
, old_flags
= neigh
->flags
;
202 if (!(flags
& NEIGH_UPDATE_F_ADMIN
))
205 ndm_flags
= (flags
& NEIGH_UPDATE_F_EXT_LEARNED
) ? NTF_EXT_LEARNED
: 0;
206 ndm_flags
|= (flags
& NEIGH_UPDATE_F_MANAGED
) ? NTF_MANAGED
: 0;
208 if ((old_flags
^ ndm_flags
) & NTF_EXT_LEARNED
) {
209 if (ndm_flags
& NTF_EXT_LEARNED
)
210 neigh
->flags
|= NTF_EXT_LEARNED
;
212 neigh
->flags
&= ~NTF_EXT_LEARNED
;
216 if ((old_flags
^ ndm_flags
) & NTF_MANAGED
) {
217 if (ndm_flags
& NTF_MANAGED
)
218 neigh
->flags
|= NTF_MANAGED
;
220 neigh
->flags
&= ~NTF_MANAGED
;
222 *managed_update
= true;
226 bool neigh_remove_one(struct neighbour
*n
)
230 write_lock(&n
->lock
);
231 if (refcount_read(&n
->refcnt
) == 1) {
232 hlist_del_rcu(&n
->hash
);
233 hlist_del_rcu(&n
->dev_list
);
237 write_unlock(&n
->lock
);
239 neigh_cleanup_and_release(n
);
243 static int neigh_forced_gc(struct neigh_table
*tbl
)
245 int max_clean
= atomic_read(&tbl
->gc_entries
) -
246 READ_ONCE(tbl
->gc_thresh2
);
247 u64 tmax
= ktime_get_ns() + NSEC_PER_MSEC
;
248 unsigned long tref
= jiffies
- 5 * HZ
;
249 struct neighbour
*n
, *tmp
;
253 NEIGH_CACHE_STAT_INC(tbl
, forced_gc_runs
);
255 write_lock_bh(&tbl
->lock
);
257 list_for_each_entry_safe(n
, tmp
, &tbl
->gc_list
, gc_list
) {
258 if (refcount_read(&n
->refcnt
) == 1) {
261 write_lock(&n
->lock
);
262 if ((n
->nud_state
== NUD_FAILED
) ||
263 (n
->nud_state
== NUD_NOARP
) ||
264 (tbl
->is_multicast
&&
265 tbl
->is_multicast(n
->primary_key
)) ||
266 !time_in_range(n
->updated
, tref
, jiffies
))
268 write_unlock(&n
->lock
);
270 if (remove
&& neigh_remove_one(n
))
272 if (shrunk
>= max_clean
)
275 if (ktime_get_ns() > tmax
)
282 WRITE_ONCE(tbl
->last_flush
, jiffies
);
284 write_unlock_bh(&tbl
->lock
);
289 static void neigh_add_timer(struct neighbour
*n
, unsigned long when
)
291 /* Use safe distance from the jiffies - LONG_MAX point while timer
292 * is running in DELAY/PROBE state but still show to user space
293 * large times in the past.
295 unsigned long mint
= jiffies
- (LONG_MAX
- 86400 * HZ
);
298 if (!time_in_range(n
->confirmed
, mint
, jiffies
))
300 if (time_before(n
->used
, n
->confirmed
))
301 n
->used
= n
->confirmed
;
302 if (unlikely(mod_timer(&n
->timer
, when
))) {
303 printk("NEIGH: BUG, double timer add, state is %x\n",
309 static int neigh_del_timer(struct neighbour
*n
)
311 if ((n
->nud_state
& NUD_IN_TIMER
) &&
312 del_timer(&n
->timer
)) {
319 static struct neigh_parms
*neigh_get_dev_parms_rcu(struct net_device
*dev
,
324 return __in_dev_arp_parms_get_rcu(dev
);
326 return __in6_dev_nd_parms_get_rcu(dev
);
331 static void neigh_parms_qlen_dec(struct net_device
*dev
, int family
)
333 struct neigh_parms
*p
;
336 p
= neigh_get_dev_parms_rcu(dev
, family
);
342 static void pneigh_queue_purge(struct sk_buff_head
*list
, struct net
*net
,
345 struct sk_buff_head tmp
;
349 skb_queue_head_init(&tmp
);
350 spin_lock_irqsave(&list
->lock
, flags
);
351 skb
= skb_peek(list
);
352 while (skb
!= NULL
) {
353 struct sk_buff
*skb_next
= skb_peek_next(skb
, list
);
354 struct net_device
*dev
= skb
->dev
;
356 if (net
== NULL
|| net_eq(dev_net(dev
), net
)) {
357 neigh_parms_qlen_dec(dev
, family
);
358 __skb_unlink(skb
, list
);
359 __skb_queue_tail(&tmp
, skb
);
363 spin_unlock_irqrestore(&list
->lock
, flags
);
365 while ((skb
= __skb_dequeue(&tmp
))) {
371 static void neigh_flush_dev(struct neigh_table
*tbl
, struct net_device
*dev
,
374 struct hlist_head
*dev_head
;
375 struct hlist_node
*tmp
;
378 dev_head
= neigh_get_dev_table(dev
, tbl
->family
);
380 hlist_for_each_entry_safe(n
, tmp
, dev_head
, dev_list
) {
381 if (skip_perm
&& n
->nud_state
& NUD_PERMANENT
)
384 hlist_del_rcu(&n
->hash
);
385 hlist_del_rcu(&n
->dev_list
);
386 write_lock(&n
->lock
);
389 if (refcount_read(&n
->refcnt
) != 1) {
390 /* The most unpleasant situation.
391 * We must destroy neighbour entry,
392 * but someone still uses it.
394 * The destroy will be delayed until
395 * the last user releases us, but
396 * we must kill timers etc. and move
399 __skb_queue_purge(&n
->arp_queue
);
400 n
->arp_queue_len_bytes
= 0;
401 WRITE_ONCE(n
->output
, neigh_blackhole
);
402 if (n
->nud_state
& NUD_VALID
)
403 n
->nud_state
= NUD_NOARP
;
405 n
->nud_state
= NUD_NONE
;
406 neigh_dbg(2, "neigh %p is stray\n", n
);
408 write_unlock(&n
->lock
);
409 neigh_cleanup_and_release(n
);
413 void neigh_changeaddr(struct neigh_table
*tbl
, struct net_device
*dev
)
415 write_lock_bh(&tbl
->lock
);
416 neigh_flush_dev(tbl
, dev
, false);
417 write_unlock_bh(&tbl
->lock
);
419 EXPORT_SYMBOL(neigh_changeaddr
);
421 static int __neigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
,
424 write_lock_bh(&tbl
->lock
);
425 neigh_flush_dev(tbl
, dev
, skip_perm
);
426 pneigh_ifdown_and_unlock(tbl
, dev
);
427 pneigh_queue_purge(&tbl
->proxy_queue
, dev
? dev_net(dev
) : NULL
,
429 if (skb_queue_empty_lockless(&tbl
->proxy_queue
))
430 del_timer_sync(&tbl
->proxy_timer
);
434 int neigh_carrier_down(struct neigh_table
*tbl
, struct net_device
*dev
)
436 __neigh_ifdown(tbl
, dev
, true);
439 EXPORT_SYMBOL(neigh_carrier_down
);
441 int neigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
)
443 __neigh_ifdown(tbl
, dev
, false);
446 EXPORT_SYMBOL(neigh_ifdown
);
448 static struct neighbour
*neigh_alloc(struct neigh_table
*tbl
,
449 struct net_device
*dev
,
450 u32 flags
, bool exempt_from_gc
)
452 struct neighbour
*n
= NULL
;
453 unsigned long now
= jiffies
;
454 int entries
, gc_thresh3
;
459 entries
= atomic_inc_return(&tbl
->gc_entries
) - 1;
460 gc_thresh3
= READ_ONCE(tbl
->gc_thresh3
);
461 if (entries
>= gc_thresh3
||
462 (entries
>= READ_ONCE(tbl
->gc_thresh2
) &&
463 time_after(now
, READ_ONCE(tbl
->last_flush
) + 5 * HZ
))) {
464 if (!neigh_forced_gc(tbl
) && entries
>= gc_thresh3
) {
465 net_info_ratelimited("%s: neighbor table overflow!\n",
467 NEIGH_CACHE_STAT_INC(tbl
, table_fulls
);
473 n
= kzalloc(tbl
->entry_size
+ dev
->neigh_priv_len
, GFP_ATOMIC
);
477 __skb_queue_head_init(&n
->arp_queue
);
478 rwlock_init(&n
->lock
);
479 seqlock_init(&n
->ha_lock
);
480 n
->updated
= n
->used
= now
;
481 n
->nud_state
= NUD_NONE
;
482 n
->output
= neigh_blackhole
;
484 seqlock_init(&n
->hh
.hh_lock
);
485 n
->parms
= neigh_parms_clone(&tbl
->parms
);
486 timer_setup(&n
->timer
, neigh_timer_handler
, 0);
488 NEIGH_CACHE_STAT_INC(tbl
, allocs
);
490 refcount_set(&n
->refcnt
, 1);
492 INIT_LIST_HEAD(&n
->gc_list
);
493 INIT_LIST_HEAD(&n
->managed_list
);
495 atomic_inc(&tbl
->entries
);
501 atomic_dec(&tbl
->gc_entries
);
505 static void neigh_get_hash_rnd(u32
*x
)
507 *x
= get_random_u32() | 1;
510 static struct neigh_hash_table
*neigh_hash_alloc(unsigned int shift
)
512 size_t size
= (1 << shift
) * sizeof(struct hlist_head
);
513 struct hlist_head
*hash_heads
;
514 struct neigh_hash_table
*ret
;
517 ret
= kmalloc(sizeof(*ret
), GFP_ATOMIC
);
521 hash_heads
= kvzalloc(size
, GFP_ATOMIC
);
526 ret
->hash_heads
= hash_heads
;
527 ret
->hash_shift
= shift
;
528 for (i
= 0; i
< NEIGH_NUM_HASH_RND
; i
++)
529 neigh_get_hash_rnd(&ret
->hash_rnd
[i
]);
533 static void neigh_hash_free_rcu(struct rcu_head
*head
)
535 struct neigh_hash_table
*nht
= container_of(head
,
536 struct neigh_hash_table
,
539 kvfree(nht
->hash_heads
);
543 static struct neigh_hash_table
*neigh_hash_grow(struct neigh_table
*tbl
,
544 unsigned long new_shift
)
546 unsigned int i
, hash
;
547 struct neigh_hash_table
*new_nht
, *old_nht
;
549 NEIGH_CACHE_STAT_INC(tbl
, hash_grows
);
551 old_nht
= rcu_dereference_protected(tbl
->nht
,
552 lockdep_is_held(&tbl
->lock
));
553 new_nht
= neigh_hash_alloc(new_shift
);
557 for (i
= 0; i
< (1 << old_nht
->hash_shift
); i
++) {
558 struct hlist_node
*tmp
;
561 neigh_for_each_in_bucket_safe(n
, tmp
, &old_nht
->hash_heads
[i
]) {
562 hash
= tbl
->hash(n
->primary_key
, n
->dev
,
565 hash
>>= (32 - new_nht
->hash_shift
);
567 hlist_del_rcu(&n
->hash
);
568 hlist_add_head_rcu(&n
->hash
, &new_nht
->hash_heads
[hash
]);
572 rcu_assign_pointer(tbl
->nht
, new_nht
);
573 call_rcu(&old_nht
->rcu
, neigh_hash_free_rcu
);
577 struct neighbour
*neigh_lookup(struct neigh_table
*tbl
, const void *pkey
,
578 struct net_device
*dev
)
582 NEIGH_CACHE_STAT_INC(tbl
, lookups
);
585 n
= __neigh_lookup_noref(tbl
, pkey
, dev
);
587 if (!refcount_inc_not_zero(&n
->refcnt
))
589 NEIGH_CACHE_STAT_INC(tbl
, hits
);
595 EXPORT_SYMBOL(neigh_lookup
);
597 static struct neighbour
*
598 ___neigh_create(struct neigh_table
*tbl
, const void *pkey
,
599 struct net_device
*dev
, u32 flags
,
600 bool exempt_from_gc
, bool want_ref
)
602 u32 hash_val
, key_len
= tbl
->key_len
;
603 struct neighbour
*n1
, *rc
, *n
;
604 struct neigh_hash_table
*nht
;
607 n
= neigh_alloc(tbl
, dev
, flags
, exempt_from_gc
);
608 trace_neigh_create(tbl
, dev
, pkey
, n
, exempt_from_gc
);
610 rc
= ERR_PTR(-ENOBUFS
);
614 memcpy(n
->primary_key
, pkey
, key_len
);
616 netdev_hold(dev
, &n
->dev_tracker
, GFP_ATOMIC
);
618 /* Protocol specific setup. */
619 if (tbl
->constructor
&& (error
= tbl
->constructor(n
)) < 0) {
621 goto out_neigh_release
;
624 if (dev
->netdev_ops
->ndo_neigh_construct
) {
625 error
= dev
->netdev_ops
->ndo_neigh_construct(dev
, n
);
628 goto out_neigh_release
;
632 /* Device specific setup. */
633 if (n
->parms
->neigh_setup
&&
634 (error
= n
->parms
->neigh_setup(n
)) < 0) {
636 goto out_neigh_release
;
639 n
->confirmed
= jiffies
- (NEIGH_VAR(n
->parms
, BASE_REACHABLE_TIME
) << 1);
641 write_lock_bh(&tbl
->lock
);
642 nht
= rcu_dereference_protected(tbl
->nht
,
643 lockdep_is_held(&tbl
->lock
));
645 if (atomic_read(&tbl
->entries
) > (1 << nht
->hash_shift
))
646 nht
= neigh_hash_grow(tbl
, nht
->hash_shift
+ 1);
648 hash_val
= tbl
->hash(n
->primary_key
, dev
, nht
->hash_rnd
) >> (32 - nht
->hash_shift
);
650 if (n
->parms
->dead
) {
651 rc
= ERR_PTR(-EINVAL
);
655 neigh_for_each_in_bucket(n1
, &nht
->hash_heads
[hash_val
]) {
656 if (dev
== n1
->dev
&& !memcmp(n1
->primary_key
, n
->primary_key
, key_len
)) {
666 list_add_tail(&n
->gc_list
, &n
->tbl
->gc_list
);
667 if (n
->flags
& NTF_MANAGED
)
668 list_add_tail(&n
->managed_list
, &n
->tbl
->managed_list
);
671 hlist_add_head_rcu(&n
->hash
, &nht
->hash_heads
[hash_val
]);
673 hlist_add_head_rcu(&n
->dev_list
,
674 neigh_get_dev_table(dev
, tbl
->family
));
676 write_unlock_bh(&tbl
->lock
);
677 neigh_dbg(2, "neigh %p is created\n", n
);
682 write_unlock_bh(&tbl
->lock
);
685 atomic_dec(&tbl
->gc_entries
);
690 struct neighbour
*__neigh_create(struct neigh_table
*tbl
, const void *pkey
,
691 struct net_device
*dev
, bool want_ref
)
693 bool exempt_from_gc
= !!(dev
->flags
& IFF_LOOPBACK
);
695 return ___neigh_create(tbl
, pkey
, dev
, 0, exempt_from_gc
, want_ref
);
697 EXPORT_SYMBOL(__neigh_create
);
699 static u32
pneigh_hash(const void *pkey
, unsigned int key_len
)
701 u32 hash_val
= *(u32
*)(pkey
+ key_len
- 4);
702 hash_val
^= (hash_val
>> 16);
703 hash_val
^= hash_val
>> 8;
704 hash_val
^= hash_val
>> 4;
705 hash_val
&= PNEIGH_HASHMASK
;
709 static struct pneigh_entry
*__pneigh_lookup_1(struct pneigh_entry
*n
,
712 unsigned int key_len
,
713 struct net_device
*dev
)
716 if (!memcmp(n
->key
, pkey
, key_len
) &&
717 net_eq(pneigh_net(n
), net
) &&
718 (n
->dev
== dev
|| !n
->dev
))
725 struct pneigh_entry
*__pneigh_lookup(struct neigh_table
*tbl
,
726 struct net
*net
, const void *pkey
, struct net_device
*dev
)
728 unsigned int key_len
= tbl
->key_len
;
729 u32 hash_val
= pneigh_hash(pkey
, key_len
);
731 return __pneigh_lookup_1(tbl
->phash_buckets
[hash_val
],
732 net
, pkey
, key_len
, dev
);
734 EXPORT_SYMBOL_GPL(__pneigh_lookup
);
736 struct pneigh_entry
* pneigh_lookup(struct neigh_table
*tbl
,
737 struct net
*net
, const void *pkey
,
738 struct net_device
*dev
, int creat
)
740 struct pneigh_entry
*n
;
741 unsigned int key_len
= tbl
->key_len
;
742 u32 hash_val
= pneigh_hash(pkey
, key_len
);
744 read_lock_bh(&tbl
->lock
);
745 n
= __pneigh_lookup_1(tbl
->phash_buckets
[hash_val
],
746 net
, pkey
, key_len
, dev
);
747 read_unlock_bh(&tbl
->lock
);
754 n
= kzalloc(sizeof(*n
) + key_len
, GFP_KERNEL
);
758 write_pnet(&n
->net
, net
);
759 memcpy(n
->key
, pkey
, key_len
);
761 netdev_hold(dev
, &n
->dev_tracker
, GFP_KERNEL
);
763 if (tbl
->pconstructor
&& tbl
->pconstructor(n
)) {
764 netdev_put(dev
, &n
->dev_tracker
);
770 write_lock_bh(&tbl
->lock
);
771 n
->next
= tbl
->phash_buckets
[hash_val
];
772 tbl
->phash_buckets
[hash_val
] = n
;
773 write_unlock_bh(&tbl
->lock
);
777 EXPORT_SYMBOL(pneigh_lookup
);
780 int pneigh_delete(struct neigh_table
*tbl
, struct net
*net
, const void *pkey
,
781 struct net_device
*dev
)
783 struct pneigh_entry
*n
, **np
;
784 unsigned int key_len
= tbl
->key_len
;
785 u32 hash_val
= pneigh_hash(pkey
, key_len
);
787 write_lock_bh(&tbl
->lock
);
788 for (np
= &tbl
->phash_buckets
[hash_val
]; (n
= *np
) != NULL
;
790 if (!memcmp(n
->key
, pkey
, key_len
) && n
->dev
== dev
&&
791 net_eq(pneigh_net(n
), net
)) {
793 write_unlock_bh(&tbl
->lock
);
794 if (tbl
->pdestructor
)
796 netdev_put(n
->dev
, &n
->dev_tracker
);
801 write_unlock_bh(&tbl
->lock
);
805 static int pneigh_ifdown_and_unlock(struct neigh_table
*tbl
,
806 struct net_device
*dev
)
808 struct pneigh_entry
*n
, **np
, *freelist
= NULL
;
811 for (h
= 0; h
<= PNEIGH_HASHMASK
; h
++) {
812 np
= &tbl
->phash_buckets
[h
];
813 while ((n
= *np
) != NULL
) {
814 if (!dev
|| n
->dev
== dev
) {
823 write_unlock_bh(&tbl
->lock
);
824 while ((n
= freelist
)) {
827 if (tbl
->pdestructor
)
829 netdev_put(n
->dev
, &n
->dev_tracker
);
835 static void neigh_parms_destroy(struct neigh_parms
*parms
);
837 static inline void neigh_parms_put(struct neigh_parms
*parms
)
839 if (refcount_dec_and_test(&parms
->refcnt
))
840 neigh_parms_destroy(parms
);
844 * neighbour must already be out of the table;
847 void neigh_destroy(struct neighbour
*neigh
)
849 struct net_device
*dev
= neigh
->dev
;
851 NEIGH_CACHE_STAT_INC(neigh
->tbl
, destroys
);
854 pr_warn("Destroying alive neighbour %p\n", neigh
);
859 if (neigh_del_timer(neigh
))
860 pr_warn("Impossible event\n");
862 write_lock_bh(&neigh
->lock
);
863 __skb_queue_purge(&neigh
->arp_queue
);
864 write_unlock_bh(&neigh
->lock
);
865 neigh
->arp_queue_len_bytes
= 0;
867 if (dev
->netdev_ops
->ndo_neigh_destroy
)
868 dev
->netdev_ops
->ndo_neigh_destroy(dev
, neigh
);
870 netdev_put(dev
, &neigh
->dev_tracker
);
871 neigh_parms_put(neigh
->parms
);
873 neigh_dbg(2, "neigh %p is destroyed\n", neigh
);
875 atomic_dec(&neigh
->tbl
->entries
);
876 kfree_rcu(neigh
, rcu
);
878 EXPORT_SYMBOL(neigh_destroy
);
880 /* Neighbour state is suspicious;
883 Called with write_locked neigh.
885 static void neigh_suspect(struct neighbour
*neigh
)
887 neigh_dbg(2, "neigh %p is suspected\n", neigh
);
889 WRITE_ONCE(neigh
->output
, neigh
->ops
->output
);
892 /* Neighbour state is OK;
895 Called with write_locked neigh.
897 static void neigh_connect(struct neighbour
*neigh
)
899 neigh_dbg(2, "neigh %p is connected\n", neigh
);
901 WRITE_ONCE(neigh
->output
, neigh
->ops
->connected_output
);
904 static void neigh_periodic_work(struct work_struct
*work
)
906 struct neigh_table
*tbl
= container_of(work
, struct neigh_table
, gc_work
.work
);
907 struct neigh_hash_table
*nht
;
908 struct hlist_node
*tmp
;
912 NEIGH_CACHE_STAT_INC(tbl
, periodic_gc_runs
);
914 write_lock_bh(&tbl
->lock
);
915 nht
= rcu_dereference_protected(tbl
->nht
,
916 lockdep_is_held(&tbl
->lock
));
919 * periodically recompute ReachableTime from random function
922 if (time_after(jiffies
, tbl
->last_rand
+ 300 * HZ
)) {
923 struct neigh_parms
*p
;
925 WRITE_ONCE(tbl
->last_rand
, jiffies
);
926 list_for_each_entry(p
, &tbl
->parms_list
, list
)
928 neigh_rand_reach_time(NEIGH_VAR(p
, BASE_REACHABLE_TIME
));
931 if (atomic_read(&tbl
->entries
) < READ_ONCE(tbl
->gc_thresh1
))
934 for (i
= 0 ; i
< (1 << nht
->hash_shift
); i
++) {
935 neigh_for_each_in_bucket_safe(n
, tmp
, &nht
->hash_heads
[i
]) {
938 write_lock(&n
->lock
);
940 state
= n
->nud_state
;
941 if ((state
& (NUD_PERMANENT
| NUD_IN_TIMER
)) ||
942 (n
->flags
& NTF_EXT_LEARNED
)) {
943 write_unlock(&n
->lock
);
947 if (time_before(n
->used
, n
->confirmed
) &&
948 time_is_before_eq_jiffies(n
->confirmed
))
949 n
->used
= n
->confirmed
;
951 if (refcount_read(&n
->refcnt
) == 1 &&
952 (state
== NUD_FAILED
||
953 !time_in_range_open(jiffies
, n
->used
,
954 n
->used
+ NEIGH_VAR(n
->parms
, GC_STALETIME
)))) {
955 hlist_del_rcu(&n
->hash
);
956 hlist_del_rcu(&n
->dev_list
);
958 write_unlock(&n
->lock
);
959 neigh_cleanup_and_release(n
);
962 write_unlock(&n
->lock
);
965 * It's fine to release lock here, even if hash table
966 * grows while we are preempted.
968 write_unlock_bh(&tbl
->lock
);
970 write_lock_bh(&tbl
->lock
);
971 nht
= rcu_dereference_protected(tbl
->nht
,
972 lockdep_is_held(&tbl
->lock
));
975 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
976 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
977 * BASE_REACHABLE_TIME.
979 queue_delayed_work(system_power_efficient_wq
, &tbl
->gc_work
,
980 NEIGH_VAR(&tbl
->parms
, BASE_REACHABLE_TIME
) >> 1);
981 write_unlock_bh(&tbl
->lock
);
984 static __inline__
int neigh_max_probes(struct neighbour
*n
)
986 struct neigh_parms
*p
= n
->parms
;
987 return NEIGH_VAR(p
, UCAST_PROBES
) + NEIGH_VAR(p
, APP_PROBES
) +
988 (n
->nud_state
& NUD_PROBE
? NEIGH_VAR(p
, MCAST_REPROBES
) :
989 NEIGH_VAR(p
, MCAST_PROBES
));
992 static void neigh_invalidate(struct neighbour
*neigh
)
993 __releases(neigh
->lock
)
994 __acquires(neigh
->lock
)
998 NEIGH_CACHE_STAT_INC(neigh
->tbl
, res_failed
);
999 neigh_dbg(2, "neigh %p is failed\n", neigh
);
1000 neigh
->updated
= jiffies
;
1002 /* It is very thin place. report_unreachable is very complicated
1003 routine. Particularly, it can hit the same neighbour entry!
1005 So that, we try to be accurate and avoid dead loop. --ANK
1007 while (neigh
->nud_state
== NUD_FAILED
&&
1008 (skb
= __skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
1009 write_unlock(&neigh
->lock
);
1010 neigh
->ops
->error_report(neigh
, skb
);
1011 write_lock(&neigh
->lock
);
1013 __skb_queue_purge(&neigh
->arp_queue
);
1014 neigh
->arp_queue_len_bytes
= 0;
1017 static void neigh_probe(struct neighbour
*neigh
)
1018 __releases(neigh
->lock
)
1020 struct sk_buff
*skb
= skb_peek_tail(&neigh
->arp_queue
);
1021 /* keep skb alive even if arp_queue overflows */
1023 skb
= skb_clone(skb
, GFP_ATOMIC
);
1024 write_unlock(&neigh
->lock
);
1025 if (neigh
->ops
->solicit
)
1026 neigh
->ops
->solicit(neigh
, skb
);
1027 atomic_inc(&neigh
->probes
);
1031 /* Called when a timer expires for a neighbour entry. */
1033 static void neigh_timer_handler(struct timer_list
*t
)
1035 unsigned long now
, next
;
1036 struct neighbour
*neigh
= from_timer(neigh
, t
, timer
);
1040 write_lock(&neigh
->lock
);
1042 state
= neigh
->nud_state
;
1046 if (!(state
& NUD_IN_TIMER
))
1049 if (state
& NUD_REACHABLE
) {
1050 if (time_before_eq(now
,
1051 neigh
->confirmed
+ neigh
->parms
->reachable_time
)) {
1052 neigh_dbg(2, "neigh %p is still alive\n", neigh
);
1053 next
= neigh
->confirmed
+ neigh
->parms
->reachable_time
;
1054 } else if (time_before_eq(now
,
1056 NEIGH_VAR(neigh
->parms
, DELAY_PROBE_TIME
))) {
1057 neigh_dbg(2, "neigh %p is delayed\n", neigh
);
1058 WRITE_ONCE(neigh
->nud_state
, NUD_DELAY
);
1059 neigh
->updated
= jiffies
;
1060 neigh_suspect(neigh
);
1061 next
= now
+ NEIGH_VAR(neigh
->parms
, DELAY_PROBE_TIME
);
1063 neigh_dbg(2, "neigh %p is suspected\n", neigh
);
1064 WRITE_ONCE(neigh
->nud_state
, NUD_STALE
);
1065 neigh
->updated
= jiffies
;
1066 neigh_suspect(neigh
);
1069 } else if (state
& NUD_DELAY
) {
1070 if (time_before_eq(now
,
1072 NEIGH_VAR(neigh
->parms
, DELAY_PROBE_TIME
))) {
1073 neigh_dbg(2, "neigh %p is now reachable\n", neigh
);
1074 WRITE_ONCE(neigh
->nud_state
, NUD_REACHABLE
);
1075 neigh
->updated
= jiffies
;
1076 neigh_connect(neigh
);
1078 next
= neigh
->confirmed
+ neigh
->parms
->reachable_time
;
1080 neigh_dbg(2, "neigh %p is probed\n", neigh
);
1081 WRITE_ONCE(neigh
->nud_state
, NUD_PROBE
);
1082 neigh
->updated
= jiffies
;
1083 atomic_set(&neigh
->probes
, 0);
1085 next
= now
+ max(NEIGH_VAR(neigh
->parms
, RETRANS_TIME
),
1089 /* NUD_PROBE|NUD_INCOMPLETE */
1090 next
= now
+ max(NEIGH_VAR(neigh
->parms
, RETRANS_TIME
), HZ
/100);
1093 if ((neigh
->nud_state
& (NUD_INCOMPLETE
| NUD_PROBE
)) &&
1094 atomic_read(&neigh
->probes
) >= neigh_max_probes(neigh
)) {
1095 WRITE_ONCE(neigh
->nud_state
, NUD_FAILED
);
1097 neigh_invalidate(neigh
);
1101 if (neigh
->nud_state
& NUD_IN_TIMER
) {
1102 if (time_before(next
, jiffies
+ HZ
/100))
1103 next
= jiffies
+ HZ
/100;
1104 if (!mod_timer(&neigh
->timer
, next
))
1107 if (neigh
->nud_state
& (NUD_INCOMPLETE
| NUD_PROBE
)) {
1111 write_unlock(&neigh
->lock
);
1115 neigh_update_notify(neigh
, 0);
1117 trace_neigh_timer_handler(neigh
, 0);
1119 neigh_release(neigh
);
1122 int __neigh_event_send(struct neighbour
*neigh
, struct sk_buff
*skb
,
1123 const bool immediate_ok
)
1126 bool immediate_probe
= false;
1128 write_lock_bh(&neigh
->lock
);
1131 if (neigh
->nud_state
& (NUD_CONNECTED
| NUD_DELAY
| NUD_PROBE
))
1136 if (!(neigh
->nud_state
& (NUD_STALE
| NUD_INCOMPLETE
))) {
1137 if (NEIGH_VAR(neigh
->parms
, MCAST_PROBES
) +
1138 NEIGH_VAR(neigh
->parms
, APP_PROBES
)) {
1139 unsigned long next
, now
= jiffies
;
1141 atomic_set(&neigh
->probes
,
1142 NEIGH_VAR(neigh
->parms
, UCAST_PROBES
));
1143 neigh_del_timer(neigh
);
1144 WRITE_ONCE(neigh
->nud_state
, NUD_INCOMPLETE
);
1145 neigh
->updated
= now
;
1146 if (!immediate_ok
) {
1149 immediate_probe
= true;
1150 next
= now
+ max(NEIGH_VAR(neigh
->parms
,
1154 neigh_add_timer(neigh
, next
);
1156 WRITE_ONCE(neigh
->nud_state
, NUD_FAILED
);
1157 neigh
->updated
= jiffies
;
1158 write_unlock_bh(&neigh
->lock
);
1160 kfree_skb_reason(skb
, SKB_DROP_REASON_NEIGH_FAILED
);
1163 } else if (neigh
->nud_state
& NUD_STALE
) {
1164 neigh_dbg(2, "neigh %p is delayed\n", neigh
);
1165 neigh_del_timer(neigh
);
1166 WRITE_ONCE(neigh
->nud_state
, NUD_DELAY
);
1167 neigh
->updated
= jiffies
;
1168 neigh_add_timer(neigh
, jiffies
+
1169 NEIGH_VAR(neigh
->parms
, DELAY_PROBE_TIME
));
1172 if (neigh
->nud_state
== NUD_INCOMPLETE
) {
1174 while (neigh
->arp_queue_len_bytes
+ skb
->truesize
>
1175 NEIGH_VAR(neigh
->parms
, QUEUE_LEN_BYTES
)) {
1176 struct sk_buff
*buff
;
1178 buff
= __skb_dequeue(&neigh
->arp_queue
);
1181 neigh
->arp_queue_len_bytes
-= buff
->truesize
;
1182 kfree_skb_reason(buff
, SKB_DROP_REASON_NEIGH_QUEUEFULL
);
1183 NEIGH_CACHE_STAT_INC(neigh
->tbl
, unres_discards
);
1186 __skb_queue_tail(&neigh
->arp_queue
, skb
);
1187 neigh
->arp_queue_len_bytes
+= skb
->truesize
;
1192 if (immediate_probe
)
1195 write_unlock(&neigh
->lock
);
1197 trace_neigh_event_send_done(neigh
, rc
);
1201 if (neigh
->nud_state
& NUD_STALE
)
1203 write_unlock_bh(&neigh
->lock
);
1204 kfree_skb_reason(skb
, SKB_DROP_REASON_NEIGH_DEAD
);
1205 trace_neigh_event_send_dead(neigh
, 1);
1208 EXPORT_SYMBOL(__neigh_event_send
);
1210 static void neigh_update_hhs(struct neighbour
*neigh
)
1212 struct hh_cache
*hh
;
1213 void (*update
)(struct hh_cache
*, const struct net_device
*, const unsigned char *)
1216 if (neigh
->dev
->header_ops
)
1217 update
= neigh
->dev
->header_ops
->cache_update
;
1221 if (READ_ONCE(hh
->hh_len
)) {
1222 write_seqlock_bh(&hh
->hh_lock
);
1223 update(hh
, neigh
->dev
, neigh
->ha
);
1224 write_sequnlock_bh(&hh
->hh_lock
);
1229 /* Generic update routine.
1230 -- lladdr is new lladdr or NULL, if it is not supplied.
1231 -- new is new state.
1233 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1235 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1236 lladdr instead of overriding it
1238 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1239 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1240 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1241 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1243 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1246 Caller MUST hold reference count on the entry.
1248 static int __neigh_update(struct neighbour
*neigh
, const u8
*lladdr
,
1249 u8
new, u32 flags
, u32 nlmsg_pid
,
1250 struct netlink_ext_ack
*extack
)
1252 bool gc_update
= false, managed_update
= false;
1253 int update_isrouter
= 0;
1254 struct net_device
*dev
;
1255 int err
, notify
= 0;
1258 trace_neigh_update(neigh
, lladdr
, new, flags
, nlmsg_pid
);
1260 write_lock_bh(&neigh
->lock
);
1263 old
= neigh
->nud_state
;
1267 NL_SET_ERR_MSG(extack
, "Neighbor entry is now dead");
1271 if (!(flags
& NEIGH_UPDATE_F_ADMIN
) &&
1272 (old
& (NUD_NOARP
| NUD_PERMANENT
)))
1275 neigh_update_flags(neigh
, flags
, ¬ify
, &gc_update
, &managed_update
);
1276 if (flags
& (NEIGH_UPDATE_F_USE
| NEIGH_UPDATE_F_MANAGED
)) {
1277 new = old
& ~NUD_PERMANENT
;
1278 WRITE_ONCE(neigh
->nud_state
, new);
1283 if (!(new & NUD_VALID
)) {
1284 neigh_del_timer(neigh
);
1285 if (old
& NUD_CONNECTED
)
1286 neigh_suspect(neigh
);
1287 WRITE_ONCE(neigh
->nud_state
, new);
1289 notify
= old
& NUD_VALID
;
1290 if ((old
& (NUD_INCOMPLETE
| NUD_PROBE
)) &&
1291 (new & NUD_FAILED
)) {
1292 neigh_invalidate(neigh
);
1298 /* Compare new lladdr with cached one */
1299 if (!dev
->addr_len
) {
1300 /* First case: device needs no address. */
1302 } else if (lladdr
) {
1303 /* The second case: if something is already cached
1304 and a new address is proposed:
1306 - if they are different, check override flag
1308 if ((old
& NUD_VALID
) &&
1309 !memcmp(lladdr
, neigh
->ha
, dev
->addr_len
))
1312 /* No address is supplied; if we know something,
1313 use it, otherwise discard the request.
1316 if (!(old
& NUD_VALID
)) {
1317 NL_SET_ERR_MSG(extack
, "No link layer address given");
1323 /* Update confirmed timestamp for neighbour entry after we
1324 * received ARP packet even if it doesn't change IP to MAC binding.
1326 if (new & NUD_CONNECTED
)
1327 neigh
->confirmed
= jiffies
;
1329 /* If entry was valid and address is not changed,
1330 do not change entry state, if new one is STALE.
1333 update_isrouter
= flags
& NEIGH_UPDATE_F_OVERRIDE_ISROUTER
;
1334 if (old
& NUD_VALID
) {
1335 if (lladdr
!= neigh
->ha
&& !(flags
& NEIGH_UPDATE_F_OVERRIDE
)) {
1336 update_isrouter
= 0;
1337 if ((flags
& NEIGH_UPDATE_F_WEAK_OVERRIDE
) &&
1338 (old
& NUD_CONNECTED
)) {
1344 if (lladdr
== neigh
->ha
&& new == NUD_STALE
&&
1345 !(flags
& NEIGH_UPDATE_F_ADMIN
))
1350 /* Update timestamp only once we know we will make a change to the
1351 * neighbour entry. Otherwise we risk to move the locktime window with
1352 * noop updates and ignore relevant ARP updates.
1354 if (new != old
|| lladdr
!= neigh
->ha
)
1355 neigh
->updated
= jiffies
;
1358 neigh_del_timer(neigh
);
1359 if (new & NUD_PROBE
)
1360 atomic_set(&neigh
->probes
, 0);
1361 if (new & NUD_IN_TIMER
)
1362 neigh_add_timer(neigh
, (jiffies
+
1363 ((new & NUD_REACHABLE
) ?
1364 neigh
->parms
->reachable_time
:
1366 WRITE_ONCE(neigh
->nud_state
, new);
1370 if (lladdr
!= neigh
->ha
) {
1371 write_seqlock(&neigh
->ha_lock
);
1372 memcpy(&neigh
->ha
, lladdr
, dev
->addr_len
);
1373 write_sequnlock(&neigh
->ha_lock
);
1374 neigh_update_hhs(neigh
);
1375 if (!(new & NUD_CONNECTED
))
1376 neigh
->confirmed
= jiffies
-
1377 (NEIGH_VAR(neigh
->parms
, BASE_REACHABLE_TIME
) << 1);
1382 if (new & NUD_CONNECTED
)
1383 neigh_connect(neigh
);
1385 neigh_suspect(neigh
);
1386 if (!(old
& NUD_VALID
)) {
1387 struct sk_buff
*skb
;
1389 /* Again: avoid dead loop if something went wrong */
1391 while (neigh
->nud_state
& NUD_VALID
&&
1392 (skb
= __skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
1393 struct dst_entry
*dst
= skb_dst(skb
);
1394 struct neighbour
*n2
, *n1
= neigh
;
1395 write_unlock_bh(&neigh
->lock
);
1399 /* Why not just use 'neigh' as-is? The problem is that
1400 * things such as shaper, eql, and sch_teql can end up
1401 * using alternative, different, neigh objects to output
1402 * the packet in the output path. So what we need to do
1403 * here is re-lookup the top-level neigh in the path so
1404 * we can reinject the packet there.
1407 if (dst
&& dst
->obsolete
!= DST_OBSOLETE_DEAD
) {
1408 n2
= dst_neigh_lookup_skb(dst
, skb
);
1412 READ_ONCE(n1
->output
)(n1
, skb
);
1417 write_lock_bh(&neigh
->lock
);
1419 __skb_queue_purge(&neigh
->arp_queue
);
1420 neigh
->arp_queue_len_bytes
= 0;
1423 if (update_isrouter
)
1424 neigh_update_is_router(neigh
, flags
, ¬ify
);
1425 write_unlock_bh(&neigh
->lock
);
1426 if (((new ^ old
) & NUD_PERMANENT
) || gc_update
)
1427 neigh_update_gc_list(neigh
);
1429 neigh_update_managed_list(neigh
);
1431 neigh_update_notify(neigh
, nlmsg_pid
);
1432 trace_neigh_update_done(neigh
, err
);
1436 int neigh_update(struct neighbour
*neigh
, const u8
*lladdr
, u8
new,
1437 u32 flags
, u32 nlmsg_pid
)
1439 return __neigh_update(neigh
, lladdr
, new, flags
, nlmsg_pid
, NULL
);
1441 EXPORT_SYMBOL(neigh_update
);
1443 /* Update the neigh to listen temporarily for probe responses, even if it is
1444 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1446 void __neigh_set_probe_once(struct neighbour
*neigh
)
1450 neigh
->updated
= jiffies
;
1451 if (!(neigh
->nud_state
& NUD_FAILED
))
1453 WRITE_ONCE(neigh
->nud_state
, NUD_INCOMPLETE
);
1454 atomic_set(&neigh
->probes
, neigh_max_probes(neigh
));
1455 neigh_add_timer(neigh
,
1456 jiffies
+ max(NEIGH_VAR(neigh
->parms
, RETRANS_TIME
),
1459 EXPORT_SYMBOL(__neigh_set_probe_once
);
1461 struct neighbour
*neigh_event_ns(struct neigh_table
*tbl
,
1462 u8
*lladdr
, void *saddr
,
1463 struct net_device
*dev
)
1465 struct neighbour
*neigh
= __neigh_lookup(tbl
, saddr
, dev
,
1466 lladdr
|| !dev
->addr_len
);
1468 neigh_update(neigh
, lladdr
, NUD_STALE
,
1469 NEIGH_UPDATE_F_OVERRIDE
, 0);
1472 EXPORT_SYMBOL(neigh_event_ns
);
1474 /* called with read_lock_bh(&n->lock); */
1475 static void neigh_hh_init(struct neighbour
*n
)
1477 struct net_device
*dev
= n
->dev
;
1478 __be16 prot
= n
->tbl
->protocol
;
1479 struct hh_cache
*hh
= &n
->hh
;
1481 write_lock_bh(&n
->lock
);
1483 /* Only one thread can come in here and initialize the
1487 dev
->header_ops
->cache(n
, hh
, prot
);
1489 write_unlock_bh(&n
->lock
);
1492 /* Slow and careful. */
1494 int neigh_resolve_output(struct neighbour
*neigh
, struct sk_buff
*skb
)
1498 if (!neigh_event_send(neigh
, skb
)) {
1500 struct net_device
*dev
= neigh
->dev
;
1503 if (dev
->header_ops
->cache
&& !READ_ONCE(neigh
->hh
.hh_len
))
1504 neigh_hh_init(neigh
);
1507 __skb_pull(skb
, skb_network_offset(skb
));
1508 seq
= read_seqbegin(&neigh
->ha_lock
);
1509 err
= dev_hard_header(skb
, dev
, ntohs(skb
->protocol
),
1510 neigh
->ha
, NULL
, skb
->len
);
1511 } while (read_seqretry(&neigh
->ha_lock
, seq
));
1514 rc
= dev_queue_xmit(skb
);
1525 EXPORT_SYMBOL(neigh_resolve_output
);
1527 /* As fast as possible without hh cache */
1529 int neigh_connected_output(struct neighbour
*neigh
, struct sk_buff
*skb
)
1531 struct net_device
*dev
= neigh
->dev
;
1536 __skb_pull(skb
, skb_network_offset(skb
));
1537 seq
= read_seqbegin(&neigh
->ha_lock
);
1538 err
= dev_hard_header(skb
, dev
, ntohs(skb
->protocol
),
1539 neigh
->ha
, NULL
, skb
->len
);
1540 } while (read_seqretry(&neigh
->ha_lock
, seq
));
1543 err
= dev_queue_xmit(skb
);
1550 EXPORT_SYMBOL(neigh_connected_output
);
1552 int neigh_direct_output(struct neighbour
*neigh
, struct sk_buff
*skb
)
1554 return dev_queue_xmit(skb
);
1556 EXPORT_SYMBOL(neigh_direct_output
);
1558 static void neigh_managed_work(struct work_struct
*work
)
1560 struct neigh_table
*tbl
= container_of(work
, struct neigh_table
,
1562 struct neighbour
*neigh
;
1564 write_lock_bh(&tbl
->lock
);
1565 list_for_each_entry(neigh
, &tbl
->managed_list
, managed_list
)
1566 neigh_event_send_probe(neigh
, NULL
, false);
1567 queue_delayed_work(system_power_efficient_wq
, &tbl
->managed_work
,
1568 NEIGH_VAR(&tbl
->parms
, INTERVAL_PROBE_TIME_MS
));
1569 write_unlock_bh(&tbl
->lock
);
1572 static void neigh_proxy_process(struct timer_list
*t
)
1574 struct neigh_table
*tbl
= from_timer(tbl
, t
, proxy_timer
);
1575 long sched_next
= 0;
1576 unsigned long now
= jiffies
;
1577 struct sk_buff
*skb
, *n
;
1579 spin_lock(&tbl
->proxy_queue
.lock
);
1581 skb_queue_walk_safe(&tbl
->proxy_queue
, skb
, n
) {
1582 long tdif
= NEIGH_CB(skb
)->sched_next
- now
;
1585 struct net_device
*dev
= skb
->dev
;
1587 neigh_parms_qlen_dec(dev
, tbl
->family
);
1588 __skb_unlink(skb
, &tbl
->proxy_queue
);
1590 if (tbl
->proxy_redo
&& netif_running(dev
)) {
1592 tbl
->proxy_redo(skb
);
1599 } else if (!sched_next
|| tdif
< sched_next
)
1602 del_timer(&tbl
->proxy_timer
);
1604 mod_timer(&tbl
->proxy_timer
, jiffies
+ sched_next
);
1605 spin_unlock(&tbl
->proxy_queue
.lock
);
1608 static unsigned long neigh_proxy_delay(struct neigh_parms
*p
)
1610 /* If proxy_delay is zero, do not call get_random_u32_below()
1611 * as it is undefined behavior.
1613 unsigned long proxy_delay
= NEIGH_VAR(p
, PROXY_DELAY
);
1615 return proxy_delay
?
1616 jiffies
+ get_random_u32_below(proxy_delay
) : jiffies
;
1619 void pneigh_enqueue(struct neigh_table
*tbl
, struct neigh_parms
*p
,
1620 struct sk_buff
*skb
)
1622 unsigned long sched_next
= neigh_proxy_delay(p
);
1624 if (p
->qlen
> NEIGH_VAR(p
, PROXY_QLEN
)) {
1629 NEIGH_CB(skb
)->sched_next
= sched_next
;
1630 NEIGH_CB(skb
)->flags
|= LOCALLY_ENQUEUED
;
1632 spin_lock(&tbl
->proxy_queue
.lock
);
1633 if (del_timer(&tbl
->proxy_timer
)) {
1634 if (time_before(tbl
->proxy_timer
.expires
, sched_next
))
1635 sched_next
= tbl
->proxy_timer
.expires
;
1639 __skb_queue_tail(&tbl
->proxy_queue
, skb
);
1641 mod_timer(&tbl
->proxy_timer
, sched_next
);
1642 spin_unlock(&tbl
->proxy_queue
.lock
);
1644 EXPORT_SYMBOL(pneigh_enqueue
);
1646 static inline struct neigh_parms
*lookup_neigh_parms(struct neigh_table
*tbl
,
1647 struct net
*net
, int ifindex
)
1649 struct neigh_parms
*p
;
1651 list_for_each_entry(p
, &tbl
->parms_list
, list
) {
1652 if ((p
->dev
&& p
->dev
->ifindex
== ifindex
&& net_eq(neigh_parms_net(p
), net
)) ||
1653 (!p
->dev
&& !ifindex
&& net_eq(net
, &init_net
)))
1660 struct neigh_parms
*neigh_parms_alloc(struct net_device
*dev
,
1661 struct neigh_table
*tbl
)
1663 struct neigh_parms
*p
;
1664 struct net
*net
= dev_net(dev
);
1665 const struct net_device_ops
*ops
= dev
->netdev_ops
;
1667 p
= kmemdup(&tbl
->parms
, sizeof(*p
), GFP_KERNEL
);
1670 refcount_set(&p
->refcnt
, 1);
1672 neigh_rand_reach_time(NEIGH_VAR(p
, BASE_REACHABLE_TIME
));
1674 netdev_hold(dev
, &p
->dev_tracker
, GFP_KERNEL
);
1676 write_pnet(&p
->net
, net
);
1677 p
->sysctl_table
= NULL
;
1679 if (ops
->ndo_neigh_setup
&& ops
->ndo_neigh_setup(dev
, p
)) {
1680 netdev_put(dev
, &p
->dev_tracker
);
1685 write_lock_bh(&tbl
->lock
);
1686 list_add(&p
->list
, &tbl
->parms
.list
);
1687 write_unlock_bh(&tbl
->lock
);
1689 neigh_parms_data_state_cleanall(p
);
1693 EXPORT_SYMBOL(neigh_parms_alloc
);
1695 static void neigh_rcu_free_parms(struct rcu_head
*head
)
1697 struct neigh_parms
*parms
=
1698 container_of(head
, struct neigh_parms
, rcu_head
);
1700 neigh_parms_put(parms
);
1703 void neigh_parms_release(struct neigh_table
*tbl
, struct neigh_parms
*parms
)
1705 if (!parms
|| parms
== &tbl
->parms
)
1707 write_lock_bh(&tbl
->lock
);
1708 list_del(&parms
->list
);
1710 write_unlock_bh(&tbl
->lock
);
1711 netdev_put(parms
->dev
, &parms
->dev_tracker
);
1712 call_rcu(&parms
->rcu_head
, neigh_rcu_free_parms
);
1714 EXPORT_SYMBOL(neigh_parms_release
);
1716 static void neigh_parms_destroy(struct neigh_parms
*parms
)
1721 static struct lock_class_key neigh_table_proxy_queue_class
;
1723 static struct neigh_table __rcu
*neigh_tables
[NEIGH_NR_TABLES
] __read_mostly
;
1725 void neigh_table_init(int index
, struct neigh_table
*tbl
)
1727 unsigned long now
= jiffies
;
1728 unsigned long phsize
;
1730 INIT_LIST_HEAD(&tbl
->parms_list
);
1731 INIT_LIST_HEAD(&tbl
->gc_list
);
1732 INIT_LIST_HEAD(&tbl
->managed_list
);
1734 list_add(&tbl
->parms
.list
, &tbl
->parms_list
);
1735 write_pnet(&tbl
->parms
.net
, &init_net
);
1736 refcount_set(&tbl
->parms
.refcnt
, 1);
1737 tbl
->parms
.reachable_time
=
1738 neigh_rand_reach_time(NEIGH_VAR(&tbl
->parms
, BASE_REACHABLE_TIME
));
1739 tbl
->parms
.qlen
= 0;
1741 tbl
->stats
= alloc_percpu(struct neigh_statistics
);
1743 panic("cannot create neighbour cache statistics");
1745 #ifdef CONFIG_PROC_FS
1746 if (!proc_create_seq_data(tbl
->id
, 0, init_net
.proc_net_stat
,
1747 &neigh_stat_seq_ops
, tbl
))
1748 panic("cannot create neighbour proc dir entry");
1751 RCU_INIT_POINTER(tbl
->nht
, neigh_hash_alloc(3));
1753 phsize
= (PNEIGH_HASHMASK
+ 1) * sizeof(struct pneigh_entry
*);
1754 tbl
->phash_buckets
= kzalloc(phsize
, GFP_KERNEL
);
1756 if (!tbl
->nht
|| !tbl
->phash_buckets
)
1757 panic("cannot allocate neighbour cache hashes");
1759 if (!tbl
->entry_size
)
1760 tbl
->entry_size
= ALIGN(offsetof(struct neighbour
, primary_key
) +
1761 tbl
->key_len
, NEIGH_PRIV_ALIGN
);
1763 WARN_ON(tbl
->entry_size
% NEIGH_PRIV_ALIGN
);
1765 rwlock_init(&tbl
->lock
);
1767 INIT_DEFERRABLE_WORK(&tbl
->gc_work
, neigh_periodic_work
);
1768 queue_delayed_work(system_power_efficient_wq
, &tbl
->gc_work
,
1769 tbl
->parms
.reachable_time
);
1770 INIT_DEFERRABLE_WORK(&tbl
->managed_work
, neigh_managed_work
);
1771 queue_delayed_work(system_power_efficient_wq
, &tbl
->managed_work
, 0);
1773 timer_setup(&tbl
->proxy_timer
, neigh_proxy_process
, 0);
1774 skb_queue_head_init_class(&tbl
->proxy_queue
,
1775 &neigh_table_proxy_queue_class
);
1777 tbl
->last_flush
= now
;
1778 tbl
->last_rand
= now
+ tbl
->parms
.reachable_time
* 20;
1780 rcu_assign_pointer(neigh_tables
[index
], tbl
);
1782 EXPORT_SYMBOL(neigh_table_init
);
1785 * Only called from ndisc_cleanup(), which means this is dead code
1786 * because we no longer can unload IPv6 module.
1788 int neigh_table_clear(int index
, struct neigh_table
*tbl
)
1790 RCU_INIT_POINTER(neigh_tables
[index
], NULL
);
1793 /* It is not clean... Fix it to unload IPv6 module safely */
1794 cancel_delayed_work_sync(&tbl
->managed_work
);
1795 cancel_delayed_work_sync(&tbl
->gc_work
);
1796 del_timer_sync(&tbl
->proxy_timer
);
1797 pneigh_queue_purge(&tbl
->proxy_queue
, NULL
, tbl
->family
);
1798 neigh_ifdown(tbl
, NULL
);
1799 if (atomic_read(&tbl
->entries
))
1800 pr_crit("neighbour leakage\n");
1802 call_rcu(&rcu_dereference_protected(tbl
->nht
, 1)->rcu
,
1803 neigh_hash_free_rcu
);
1806 kfree(tbl
->phash_buckets
);
1807 tbl
->phash_buckets
= NULL
;
1809 remove_proc_entry(tbl
->id
, init_net
.proc_net_stat
);
1811 free_percpu(tbl
->stats
);
1816 EXPORT_SYMBOL(neigh_table_clear
);
1818 static struct neigh_table
*neigh_find_table(int family
)
1820 struct neigh_table
*tbl
= NULL
;
1824 tbl
= rcu_dereference_rtnl(neigh_tables
[NEIGH_ARP_TABLE
]);
1827 tbl
= rcu_dereference_rtnl(neigh_tables
[NEIGH_ND_TABLE
]);
1834 const struct nla_policy nda_policy
[NDA_MAX
+1] = {
1835 [NDA_UNSPEC
] = { .strict_start_type
= NDA_NH_ID
},
1836 [NDA_DST
] = { .type
= NLA_BINARY
, .len
= MAX_ADDR_LEN
},
1837 [NDA_LLADDR
] = { .type
= NLA_BINARY
, .len
= MAX_ADDR_LEN
},
1838 [NDA_CACHEINFO
] = { .len
= sizeof(struct nda_cacheinfo
) },
1839 [NDA_PROBES
] = { .type
= NLA_U32
},
1840 [NDA_VLAN
] = { .type
= NLA_U16
},
1841 [NDA_PORT
] = { .type
= NLA_U16
},
1842 [NDA_VNI
] = { .type
= NLA_U32
},
1843 [NDA_IFINDEX
] = { .type
= NLA_U32
},
1844 [NDA_MASTER
] = { .type
= NLA_U32
},
1845 [NDA_PROTOCOL
] = { .type
= NLA_U8
},
1846 [NDA_NH_ID
] = { .type
= NLA_U32
},
1847 [NDA_FLAGS_EXT
] = NLA_POLICY_MASK(NLA_U32
, NTF_EXT_MASK
),
1848 [NDA_FDB_EXT_ATTRS
] = { .type
= NLA_NESTED
},
1851 static int neigh_delete(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1852 struct netlink_ext_ack
*extack
)
1854 struct net
*net
= sock_net(skb
->sk
);
1856 struct nlattr
*dst_attr
;
1857 struct neigh_table
*tbl
;
1858 struct neighbour
*neigh
;
1859 struct net_device
*dev
= NULL
;
1863 if (nlmsg_len(nlh
) < sizeof(*ndm
))
1866 dst_attr
= nlmsg_find_attr(nlh
, sizeof(*ndm
), NDA_DST
);
1868 NL_SET_ERR_MSG(extack
, "Network address not specified");
1872 ndm
= nlmsg_data(nlh
);
1873 if (ndm
->ndm_ifindex
) {
1874 dev
= __dev_get_by_index(net
, ndm
->ndm_ifindex
);
1881 tbl
= neigh_find_table(ndm
->ndm_family
);
1883 return -EAFNOSUPPORT
;
1885 if (nla_len(dst_attr
) < (int)tbl
->key_len
) {
1886 NL_SET_ERR_MSG(extack
, "Invalid network address");
1890 if (ndm
->ndm_flags
& NTF_PROXY
) {
1891 err
= pneigh_delete(tbl
, net
, nla_data(dst_attr
), dev
);
1898 neigh
= neigh_lookup(tbl
, nla_data(dst_attr
), dev
);
1899 if (neigh
== NULL
) {
1904 err
= __neigh_update(neigh
, NULL
, NUD_FAILED
,
1905 NEIGH_UPDATE_F_OVERRIDE
| NEIGH_UPDATE_F_ADMIN
,
1906 NETLINK_CB(skb
).portid
, extack
);
1907 write_lock_bh(&tbl
->lock
);
1908 neigh_release(neigh
);
1909 neigh_remove_one(neigh
);
1910 write_unlock_bh(&tbl
->lock
);
1916 static int neigh_add(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1917 struct netlink_ext_ack
*extack
)
1919 int flags
= NEIGH_UPDATE_F_ADMIN
| NEIGH_UPDATE_F_OVERRIDE
|
1920 NEIGH_UPDATE_F_OVERRIDE_ISROUTER
;
1921 struct net
*net
= sock_net(skb
->sk
);
1923 struct nlattr
*tb
[NDA_MAX
+1];
1924 struct neigh_table
*tbl
;
1925 struct net_device
*dev
= NULL
;
1926 struct neighbour
*neigh
;
1933 err
= nlmsg_parse_deprecated(nlh
, sizeof(*ndm
), tb
, NDA_MAX
,
1934 nda_policy
, extack
);
1940 NL_SET_ERR_MSG(extack
, "Network address not specified");
1944 ndm
= nlmsg_data(nlh
);
1945 ndm_flags
= ndm
->ndm_flags
;
1946 if (tb
[NDA_FLAGS_EXT
]) {
1947 u32 ext
= nla_get_u32(tb
[NDA_FLAGS_EXT
]);
1949 BUILD_BUG_ON(sizeof(neigh
->flags
) * BITS_PER_BYTE
<
1950 (sizeof(ndm
->ndm_flags
) * BITS_PER_BYTE
+
1951 hweight32(NTF_EXT_MASK
)));
1952 ndm_flags
|= (ext
<< NTF_EXT_SHIFT
);
1954 if (ndm
->ndm_ifindex
) {
1955 dev
= __dev_get_by_index(net
, ndm
->ndm_ifindex
);
1961 if (tb
[NDA_LLADDR
] && nla_len(tb
[NDA_LLADDR
]) < dev
->addr_len
) {
1962 NL_SET_ERR_MSG(extack
, "Invalid link address");
1967 tbl
= neigh_find_table(ndm
->ndm_family
);
1969 return -EAFNOSUPPORT
;
1971 if (nla_len(tb
[NDA_DST
]) < (int)tbl
->key_len
) {
1972 NL_SET_ERR_MSG(extack
, "Invalid network address");
1976 dst
= nla_data(tb
[NDA_DST
]);
1977 lladdr
= tb
[NDA_LLADDR
] ? nla_data(tb
[NDA_LLADDR
]) : NULL
;
1979 if (tb
[NDA_PROTOCOL
])
1980 protocol
= nla_get_u8(tb
[NDA_PROTOCOL
]);
1981 if (ndm_flags
& NTF_PROXY
) {
1982 struct pneigh_entry
*pn
;
1984 if (ndm_flags
& NTF_MANAGED
) {
1985 NL_SET_ERR_MSG(extack
, "Invalid NTF_* flag combination");
1990 pn
= pneigh_lookup(tbl
, net
, dst
, dev
, 1);
1992 pn
->flags
= ndm_flags
;
1994 pn
->protocol
= protocol
;
2001 NL_SET_ERR_MSG(extack
, "Device not specified");
2005 if (tbl
->allow_add
&& !tbl
->allow_add(dev
, extack
)) {
2010 neigh
= neigh_lookup(tbl
, dst
, dev
);
2011 if (neigh
== NULL
) {
2012 bool ndm_permanent
= ndm
->ndm_state
& NUD_PERMANENT
;
2013 bool exempt_from_gc
= ndm_permanent
||
2014 ndm_flags
& NTF_EXT_LEARNED
;
2016 if (!(nlh
->nlmsg_flags
& NLM_F_CREATE
)) {
2020 if (ndm_permanent
&& (ndm_flags
& NTF_MANAGED
)) {
2021 NL_SET_ERR_MSG(extack
, "Invalid NTF_* flag for permanent entry");
2026 neigh
= ___neigh_create(tbl
, dst
, dev
,
2028 (NTF_EXT_LEARNED
| NTF_MANAGED
),
2029 exempt_from_gc
, true);
2030 if (IS_ERR(neigh
)) {
2031 err
= PTR_ERR(neigh
);
2035 if (nlh
->nlmsg_flags
& NLM_F_EXCL
) {
2037 neigh_release(neigh
);
2041 if (!(nlh
->nlmsg_flags
& NLM_F_REPLACE
))
2042 flags
&= ~(NEIGH_UPDATE_F_OVERRIDE
|
2043 NEIGH_UPDATE_F_OVERRIDE_ISROUTER
);
2047 neigh
->protocol
= protocol
;
2048 if (ndm_flags
& NTF_EXT_LEARNED
)
2049 flags
|= NEIGH_UPDATE_F_EXT_LEARNED
;
2050 if (ndm_flags
& NTF_ROUTER
)
2051 flags
|= NEIGH_UPDATE_F_ISROUTER
;
2052 if (ndm_flags
& NTF_MANAGED
)
2053 flags
|= NEIGH_UPDATE_F_MANAGED
;
2054 if (ndm_flags
& NTF_USE
)
2055 flags
|= NEIGH_UPDATE_F_USE
;
2057 err
= __neigh_update(neigh
, lladdr
, ndm
->ndm_state
, flags
,
2058 NETLINK_CB(skb
).portid
, extack
);
2059 if (!err
&& ndm_flags
& (NTF_USE
| NTF_MANAGED
)) {
2060 neigh_event_send(neigh
, NULL
);
2063 neigh_release(neigh
);
2068 static int neightbl_fill_parms(struct sk_buff
*skb
, struct neigh_parms
*parms
)
2070 struct nlattr
*nest
;
2072 nest
= nla_nest_start_noflag(skb
, NDTA_PARMS
);
2077 nla_put_u32(skb
, NDTPA_IFINDEX
, parms
->dev
->ifindex
)) ||
2078 nla_put_u32(skb
, NDTPA_REFCNT
, refcount_read(&parms
->refcnt
)) ||
2079 nla_put_u32(skb
, NDTPA_QUEUE_LENBYTES
,
2080 NEIGH_VAR(parms
, QUEUE_LEN_BYTES
)) ||
2081 /* approximative value for deprecated QUEUE_LEN (in packets) */
2082 nla_put_u32(skb
, NDTPA_QUEUE_LEN
,
2083 NEIGH_VAR(parms
, QUEUE_LEN_BYTES
) / SKB_TRUESIZE(ETH_FRAME_LEN
)) ||
2084 nla_put_u32(skb
, NDTPA_PROXY_QLEN
, NEIGH_VAR(parms
, PROXY_QLEN
)) ||
2085 nla_put_u32(skb
, NDTPA_APP_PROBES
, NEIGH_VAR(parms
, APP_PROBES
)) ||
2086 nla_put_u32(skb
, NDTPA_UCAST_PROBES
,
2087 NEIGH_VAR(parms
, UCAST_PROBES
)) ||
2088 nla_put_u32(skb
, NDTPA_MCAST_PROBES
,
2089 NEIGH_VAR(parms
, MCAST_PROBES
)) ||
2090 nla_put_u32(skb
, NDTPA_MCAST_REPROBES
,
2091 NEIGH_VAR(parms
, MCAST_REPROBES
)) ||
2092 nla_put_msecs(skb
, NDTPA_REACHABLE_TIME
, parms
->reachable_time
,
2094 nla_put_msecs(skb
, NDTPA_BASE_REACHABLE_TIME
,
2095 NEIGH_VAR(parms
, BASE_REACHABLE_TIME
), NDTPA_PAD
) ||
2096 nla_put_msecs(skb
, NDTPA_GC_STALETIME
,
2097 NEIGH_VAR(parms
, GC_STALETIME
), NDTPA_PAD
) ||
2098 nla_put_msecs(skb
, NDTPA_DELAY_PROBE_TIME
,
2099 NEIGH_VAR(parms
, DELAY_PROBE_TIME
), NDTPA_PAD
) ||
2100 nla_put_msecs(skb
, NDTPA_RETRANS_TIME
,
2101 NEIGH_VAR(parms
, RETRANS_TIME
), NDTPA_PAD
) ||
2102 nla_put_msecs(skb
, NDTPA_ANYCAST_DELAY
,
2103 NEIGH_VAR(parms
, ANYCAST_DELAY
), NDTPA_PAD
) ||
2104 nla_put_msecs(skb
, NDTPA_PROXY_DELAY
,
2105 NEIGH_VAR(parms
, PROXY_DELAY
), NDTPA_PAD
) ||
2106 nla_put_msecs(skb
, NDTPA_LOCKTIME
,
2107 NEIGH_VAR(parms
, LOCKTIME
), NDTPA_PAD
) ||
2108 nla_put_msecs(skb
, NDTPA_INTERVAL_PROBE_TIME_MS
,
2109 NEIGH_VAR(parms
, INTERVAL_PROBE_TIME_MS
), NDTPA_PAD
))
2110 goto nla_put_failure
;
2111 return nla_nest_end(skb
, nest
);
2114 nla_nest_cancel(skb
, nest
);
2118 static int neightbl_fill_info(struct sk_buff
*skb
, struct neigh_table
*tbl
,
2119 u32 pid
, u32 seq
, int type
, int flags
)
2121 struct nlmsghdr
*nlh
;
2122 struct ndtmsg
*ndtmsg
;
2124 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*ndtmsg
), flags
);
2128 ndtmsg
= nlmsg_data(nlh
);
2130 read_lock_bh(&tbl
->lock
);
2131 ndtmsg
->ndtm_family
= tbl
->family
;
2132 ndtmsg
->ndtm_pad1
= 0;
2133 ndtmsg
->ndtm_pad2
= 0;
2135 if (nla_put_string(skb
, NDTA_NAME
, tbl
->id
) ||
2136 nla_put_msecs(skb
, NDTA_GC_INTERVAL
, READ_ONCE(tbl
->gc_interval
),
2138 nla_put_u32(skb
, NDTA_THRESH1
, READ_ONCE(tbl
->gc_thresh1
)) ||
2139 nla_put_u32(skb
, NDTA_THRESH2
, READ_ONCE(tbl
->gc_thresh2
)) ||
2140 nla_put_u32(skb
, NDTA_THRESH3
, READ_ONCE(tbl
->gc_thresh3
)))
2141 goto nla_put_failure
;
2143 unsigned long now
= jiffies
;
2144 long flush_delta
= now
- READ_ONCE(tbl
->last_flush
);
2145 long rand_delta
= now
- READ_ONCE(tbl
->last_rand
);
2146 struct neigh_hash_table
*nht
;
2147 struct ndt_config ndc
= {
2148 .ndtc_key_len
= tbl
->key_len
,
2149 .ndtc_entry_size
= tbl
->entry_size
,
2150 .ndtc_entries
= atomic_read(&tbl
->entries
),
2151 .ndtc_last_flush
= jiffies_to_msecs(flush_delta
),
2152 .ndtc_last_rand
= jiffies_to_msecs(rand_delta
),
2153 .ndtc_proxy_qlen
= READ_ONCE(tbl
->proxy_queue
.qlen
),
2157 nht
= rcu_dereference(tbl
->nht
);
2158 ndc
.ndtc_hash_rnd
= nht
->hash_rnd
[0];
2159 ndc
.ndtc_hash_mask
= ((1 << nht
->hash_shift
) - 1);
2162 if (nla_put(skb
, NDTA_CONFIG
, sizeof(ndc
), &ndc
))
2163 goto nla_put_failure
;
2168 struct ndt_stats ndst
;
2170 memset(&ndst
, 0, sizeof(ndst
));
2172 for_each_possible_cpu(cpu
) {
2173 struct neigh_statistics
*st
;
2175 st
= per_cpu_ptr(tbl
->stats
, cpu
);
2176 ndst
.ndts_allocs
+= READ_ONCE(st
->allocs
);
2177 ndst
.ndts_destroys
+= READ_ONCE(st
->destroys
);
2178 ndst
.ndts_hash_grows
+= READ_ONCE(st
->hash_grows
);
2179 ndst
.ndts_res_failed
+= READ_ONCE(st
->res_failed
);
2180 ndst
.ndts_lookups
+= READ_ONCE(st
->lookups
);
2181 ndst
.ndts_hits
+= READ_ONCE(st
->hits
);
2182 ndst
.ndts_rcv_probes_mcast
+= READ_ONCE(st
->rcv_probes_mcast
);
2183 ndst
.ndts_rcv_probes_ucast
+= READ_ONCE(st
->rcv_probes_ucast
);
2184 ndst
.ndts_periodic_gc_runs
+= READ_ONCE(st
->periodic_gc_runs
);
2185 ndst
.ndts_forced_gc_runs
+= READ_ONCE(st
->forced_gc_runs
);
2186 ndst
.ndts_table_fulls
+= READ_ONCE(st
->table_fulls
);
2189 if (nla_put_64bit(skb
, NDTA_STATS
, sizeof(ndst
), &ndst
,
2191 goto nla_put_failure
;
2194 BUG_ON(tbl
->parms
.dev
);
2195 if (neightbl_fill_parms(skb
, &tbl
->parms
) < 0)
2196 goto nla_put_failure
;
2198 read_unlock_bh(&tbl
->lock
);
2199 nlmsg_end(skb
, nlh
);
2203 read_unlock_bh(&tbl
->lock
);
2204 nlmsg_cancel(skb
, nlh
);
2208 static int neightbl_fill_param_info(struct sk_buff
*skb
,
2209 struct neigh_table
*tbl
,
2210 struct neigh_parms
*parms
,
2211 u32 pid
, u32 seq
, int type
,
2214 struct ndtmsg
*ndtmsg
;
2215 struct nlmsghdr
*nlh
;
2217 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*ndtmsg
), flags
);
2221 ndtmsg
= nlmsg_data(nlh
);
2223 read_lock_bh(&tbl
->lock
);
2224 ndtmsg
->ndtm_family
= tbl
->family
;
2225 ndtmsg
->ndtm_pad1
= 0;
2226 ndtmsg
->ndtm_pad2
= 0;
2228 if (nla_put_string(skb
, NDTA_NAME
, tbl
->id
) < 0 ||
2229 neightbl_fill_parms(skb
, parms
) < 0)
2232 read_unlock_bh(&tbl
->lock
);
2233 nlmsg_end(skb
, nlh
);
2236 read_unlock_bh(&tbl
->lock
);
2237 nlmsg_cancel(skb
, nlh
);
2241 static const struct nla_policy nl_neightbl_policy
[NDTA_MAX
+1] = {
2242 [NDTA_NAME
] = { .type
= NLA_STRING
},
2243 [NDTA_THRESH1
] = { .type
= NLA_U32
},
2244 [NDTA_THRESH2
] = { .type
= NLA_U32
},
2245 [NDTA_THRESH3
] = { .type
= NLA_U32
},
2246 [NDTA_GC_INTERVAL
] = { .type
= NLA_U64
},
2247 [NDTA_PARMS
] = { .type
= NLA_NESTED
},
2250 static const struct nla_policy nl_ntbl_parm_policy
[NDTPA_MAX
+1] = {
2251 [NDTPA_IFINDEX
] = { .type
= NLA_U32
},
2252 [NDTPA_QUEUE_LEN
] = { .type
= NLA_U32
},
2253 [NDTPA_PROXY_QLEN
] = { .type
= NLA_U32
},
2254 [NDTPA_APP_PROBES
] = { .type
= NLA_U32
},
2255 [NDTPA_UCAST_PROBES
] = { .type
= NLA_U32
},
2256 [NDTPA_MCAST_PROBES
] = { .type
= NLA_U32
},
2257 [NDTPA_MCAST_REPROBES
] = { .type
= NLA_U32
},
2258 [NDTPA_BASE_REACHABLE_TIME
] = { .type
= NLA_U64
},
2259 [NDTPA_GC_STALETIME
] = { .type
= NLA_U64
},
2260 [NDTPA_DELAY_PROBE_TIME
] = { .type
= NLA_U64
},
2261 [NDTPA_RETRANS_TIME
] = { .type
= NLA_U64
},
2262 [NDTPA_ANYCAST_DELAY
] = { .type
= NLA_U64
},
2263 [NDTPA_PROXY_DELAY
] = { .type
= NLA_U64
},
2264 [NDTPA_LOCKTIME
] = { .type
= NLA_U64
},
2265 [NDTPA_INTERVAL_PROBE_TIME_MS
] = { .type
= NLA_U64
, .min
= 1 },
2268 static int neightbl_set(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2269 struct netlink_ext_ack
*extack
)
2271 struct net
*net
= sock_net(skb
->sk
);
2272 struct neigh_table
*tbl
;
2273 struct ndtmsg
*ndtmsg
;
2274 struct nlattr
*tb
[NDTA_MAX
+1];
2278 err
= nlmsg_parse_deprecated(nlh
, sizeof(*ndtmsg
), tb
, NDTA_MAX
,
2279 nl_neightbl_policy
, extack
);
2283 if (tb
[NDTA_NAME
] == NULL
) {
2288 ndtmsg
= nlmsg_data(nlh
);
2290 for (tidx
= 0; tidx
< NEIGH_NR_TABLES
; tidx
++) {
2291 tbl
= rcu_dereference_rtnl(neigh_tables
[tidx
]);
2294 if (ndtmsg
->ndtm_family
&& tbl
->family
!= ndtmsg
->ndtm_family
)
2296 if (nla_strcmp(tb
[NDTA_NAME
], tbl
->id
) == 0) {
2306 * We acquire tbl->lock to be nice to the periodic timers and
2307 * make sure they always see a consistent set of values.
2309 write_lock_bh(&tbl
->lock
);
2311 if (tb
[NDTA_PARMS
]) {
2312 struct nlattr
*tbp
[NDTPA_MAX
+1];
2313 struct neigh_parms
*p
;
2316 err
= nla_parse_nested_deprecated(tbp
, NDTPA_MAX
,
2318 nl_ntbl_parm_policy
, extack
);
2320 goto errout_tbl_lock
;
2322 if (tbp
[NDTPA_IFINDEX
])
2323 ifindex
= nla_get_u32(tbp
[NDTPA_IFINDEX
]);
2325 p
= lookup_neigh_parms(tbl
, net
, ifindex
);
2328 goto errout_tbl_lock
;
2331 for (i
= 1; i
<= NDTPA_MAX
; i
++) {
2336 case NDTPA_QUEUE_LEN
:
2337 NEIGH_VAR_SET(p
, QUEUE_LEN_BYTES
,
2338 nla_get_u32(tbp
[i
]) *
2339 SKB_TRUESIZE(ETH_FRAME_LEN
));
2341 case NDTPA_QUEUE_LENBYTES
:
2342 NEIGH_VAR_SET(p
, QUEUE_LEN_BYTES
,
2343 nla_get_u32(tbp
[i
]));
2345 case NDTPA_PROXY_QLEN
:
2346 NEIGH_VAR_SET(p
, PROXY_QLEN
,
2347 nla_get_u32(tbp
[i
]));
2349 case NDTPA_APP_PROBES
:
2350 NEIGH_VAR_SET(p
, APP_PROBES
,
2351 nla_get_u32(tbp
[i
]));
2353 case NDTPA_UCAST_PROBES
:
2354 NEIGH_VAR_SET(p
, UCAST_PROBES
,
2355 nla_get_u32(tbp
[i
]));
2357 case NDTPA_MCAST_PROBES
:
2358 NEIGH_VAR_SET(p
, MCAST_PROBES
,
2359 nla_get_u32(tbp
[i
]));
2361 case NDTPA_MCAST_REPROBES
:
2362 NEIGH_VAR_SET(p
, MCAST_REPROBES
,
2363 nla_get_u32(tbp
[i
]));
2365 case NDTPA_BASE_REACHABLE_TIME
:
2366 NEIGH_VAR_SET(p
, BASE_REACHABLE_TIME
,
2367 nla_get_msecs(tbp
[i
]));
2368 /* update reachable_time as well, otherwise, the change will
2369 * only be effective after the next time neigh_periodic_work
2370 * decides to recompute it (can be multiple minutes)
2373 neigh_rand_reach_time(NEIGH_VAR(p
, BASE_REACHABLE_TIME
));
2375 case NDTPA_GC_STALETIME
:
2376 NEIGH_VAR_SET(p
, GC_STALETIME
,
2377 nla_get_msecs(tbp
[i
]));
2379 case NDTPA_DELAY_PROBE_TIME
:
2380 NEIGH_VAR_SET(p
, DELAY_PROBE_TIME
,
2381 nla_get_msecs(tbp
[i
]));
2382 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE
, p
);
2384 case NDTPA_INTERVAL_PROBE_TIME_MS
:
2385 NEIGH_VAR_SET(p
, INTERVAL_PROBE_TIME_MS
,
2386 nla_get_msecs(tbp
[i
]));
2388 case NDTPA_RETRANS_TIME
:
2389 NEIGH_VAR_SET(p
, RETRANS_TIME
,
2390 nla_get_msecs(tbp
[i
]));
2392 case NDTPA_ANYCAST_DELAY
:
2393 NEIGH_VAR_SET(p
, ANYCAST_DELAY
,
2394 nla_get_msecs(tbp
[i
]));
2396 case NDTPA_PROXY_DELAY
:
2397 NEIGH_VAR_SET(p
, PROXY_DELAY
,
2398 nla_get_msecs(tbp
[i
]));
2400 case NDTPA_LOCKTIME
:
2401 NEIGH_VAR_SET(p
, LOCKTIME
,
2402 nla_get_msecs(tbp
[i
]));
2409 if ((tb
[NDTA_THRESH1
] || tb
[NDTA_THRESH2
] ||
2410 tb
[NDTA_THRESH3
] || tb
[NDTA_GC_INTERVAL
]) &&
2411 !net_eq(net
, &init_net
))
2412 goto errout_tbl_lock
;
2414 if (tb
[NDTA_THRESH1
])
2415 WRITE_ONCE(tbl
->gc_thresh1
, nla_get_u32(tb
[NDTA_THRESH1
]));
2417 if (tb
[NDTA_THRESH2
])
2418 WRITE_ONCE(tbl
->gc_thresh2
, nla_get_u32(tb
[NDTA_THRESH2
]));
2420 if (tb
[NDTA_THRESH3
])
2421 WRITE_ONCE(tbl
->gc_thresh3
, nla_get_u32(tb
[NDTA_THRESH3
]));
2423 if (tb
[NDTA_GC_INTERVAL
])
2424 WRITE_ONCE(tbl
->gc_interval
, nla_get_msecs(tb
[NDTA_GC_INTERVAL
]));
2429 write_unlock_bh(&tbl
->lock
);
2434 static int neightbl_valid_dump_info(const struct nlmsghdr
*nlh
,
2435 struct netlink_ext_ack
*extack
)
2437 struct ndtmsg
*ndtm
;
2439 if (nlh
->nlmsg_len
< nlmsg_msg_size(sizeof(*ndtm
))) {
2440 NL_SET_ERR_MSG(extack
, "Invalid header for neighbor table dump request");
2444 ndtm
= nlmsg_data(nlh
);
2445 if (ndtm
->ndtm_pad1
|| ndtm
->ndtm_pad2
) {
2446 NL_SET_ERR_MSG(extack
, "Invalid values in header for neighbor table dump request");
2450 if (nlmsg_attrlen(nlh
, sizeof(*ndtm
))) {
2451 NL_SET_ERR_MSG(extack
, "Invalid data after header in neighbor table dump request");
2458 static int neightbl_dump_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2460 const struct nlmsghdr
*nlh
= cb
->nlh
;
2461 struct net
*net
= sock_net(skb
->sk
);
2462 int family
, tidx
, nidx
= 0;
2463 int tbl_skip
= cb
->args
[0];
2464 int neigh_skip
= cb
->args
[1];
2465 struct neigh_table
*tbl
;
2467 if (cb
->strict_check
) {
2468 int err
= neightbl_valid_dump_info(nlh
, cb
->extack
);
2474 family
= ((struct rtgenmsg
*)nlmsg_data(nlh
))->rtgen_family
;
2476 for (tidx
= 0; tidx
< NEIGH_NR_TABLES
; tidx
++) {
2477 struct neigh_parms
*p
;
2479 tbl
= rcu_dereference_rtnl(neigh_tables
[tidx
]);
2483 if (tidx
< tbl_skip
|| (family
&& tbl
->family
!= family
))
2486 if (neightbl_fill_info(skb
, tbl
, NETLINK_CB(cb
->skb
).portid
,
2487 nlh
->nlmsg_seq
, RTM_NEWNEIGHTBL
,
2492 p
= list_next_entry(&tbl
->parms
, list
);
2493 list_for_each_entry_from(p
, &tbl
->parms_list
, list
) {
2494 if (!net_eq(neigh_parms_net(p
), net
))
2497 if (nidx
< neigh_skip
)
2500 if (neightbl_fill_param_info(skb
, tbl
, p
,
2501 NETLINK_CB(cb
->skb
).portid
,
2519 static int neigh_fill_info(struct sk_buff
*skb
, struct neighbour
*neigh
,
2520 u32 pid
, u32 seq
, int type
, unsigned int flags
)
2522 u32 neigh_flags
, neigh_flags_ext
;
2523 unsigned long now
= jiffies
;
2524 struct nda_cacheinfo ci
;
2525 struct nlmsghdr
*nlh
;
2528 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*ndm
), flags
);
2532 neigh_flags_ext
= neigh
->flags
>> NTF_EXT_SHIFT
;
2533 neigh_flags
= neigh
->flags
& NTF_OLD_MASK
;
2535 ndm
= nlmsg_data(nlh
);
2536 ndm
->ndm_family
= neigh
->ops
->family
;
2539 ndm
->ndm_flags
= neigh_flags
;
2540 ndm
->ndm_type
= neigh
->type
;
2541 ndm
->ndm_ifindex
= neigh
->dev
->ifindex
;
2543 if (nla_put(skb
, NDA_DST
, neigh
->tbl
->key_len
, neigh
->primary_key
))
2544 goto nla_put_failure
;
2546 read_lock_bh(&neigh
->lock
);
2547 ndm
->ndm_state
= neigh
->nud_state
;
2548 if (neigh
->nud_state
& NUD_VALID
) {
2549 char haddr
[MAX_ADDR_LEN
];
2551 neigh_ha_snapshot(haddr
, neigh
, neigh
->dev
);
2552 if (nla_put(skb
, NDA_LLADDR
, neigh
->dev
->addr_len
, haddr
) < 0) {
2553 read_unlock_bh(&neigh
->lock
);
2554 goto nla_put_failure
;
2558 ci
.ndm_used
= jiffies_to_clock_t(now
- neigh
->used
);
2559 ci
.ndm_confirmed
= jiffies_to_clock_t(now
- neigh
->confirmed
);
2560 ci
.ndm_updated
= jiffies_to_clock_t(now
- neigh
->updated
);
2561 ci
.ndm_refcnt
= refcount_read(&neigh
->refcnt
) - 1;
2562 read_unlock_bh(&neigh
->lock
);
2564 if (nla_put_u32(skb
, NDA_PROBES
, atomic_read(&neigh
->probes
)) ||
2565 nla_put(skb
, NDA_CACHEINFO
, sizeof(ci
), &ci
))
2566 goto nla_put_failure
;
2568 if (neigh
->protocol
&& nla_put_u8(skb
, NDA_PROTOCOL
, neigh
->protocol
))
2569 goto nla_put_failure
;
2570 if (neigh_flags_ext
&& nla_put_u32(skb
, NDA_FLAGS_EXT
, neigh_flags_ext
))
2571 goto nla_put_failure
;
2573 nlmsg_end(skb
, nlh
);
2577 nlmsg_cancel(skb
, nlh
);
2581 static int pneigh_fill_info(struct sk_buff
*skb
, struct pneigh_entry
*pn
,
2582 u32 pid
, u32 seq
, int type
, unsigned int flags
,
2583 struct neigh_table
*tbl
)
2585 u32 neigh_flags
, neigh_flags_ext
;
2586 struct nlmsghdr
*nlh
;
2589 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*ndm
), flags
);
2593 neigh_flags_ext
= pn
->flags
>> NTF_EXT_SHIFT
;
2594 neigh_flags
= pn
->flags
& NTF_OLD_MASK
;
2596 ndm
= nlmsg_data(nlh
);
2597 ndm
->ndm_family
= tbl
->family
;
2600 ndm
->ndm_flags
= neigh_flags
| NTF_PROXY
;
2601 ndm
->ndm_type
= RTN_UNICAST
;
2602 ndm
->ndm_ifindex
= pn
->dev
? pn
->dev
->ifindex
: 0;
2603 ndm
->ndm_state
= NUD_NONE
;
2605 if (nla_put(skb
, NDA_DST
, tbl
->key_len
, pn
->key
))
2606 goto nla_put_failure
;
2608 if (pn
->protocol
&& nla_put_u8(skb
, NDA_PROTOCOL
, pn
->protocol
))
2609 goto nla_put_failure
;
2610 if (neigh_flags_ext
&& nla_put_u32(skb
, NDA_FLAGS_EXT
, neigh_flags_ext
))
2611 goto nla_put_failure
;
2613 nlmsg_end(skb
, nlh
);
2617 nlmsg_cancel(skb
, nlh
);
2621 static void neigh_update_notify(struct neighbour
*neigh
, u32 nlmsg_pid
)
2623 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE
, neigh
);
2624 __neigh_notify(neigh
, RTM_NEWNEIGH
, 0, nlmsg_pid
);
2627 static bool neigh_master_filtered(struct net_device
*dev
, int master_idx
)
2629 struct net_device
*master
;
2634 master
= dev
? netdev_master_upper_dev_get_rcu(dev
) : NULL
;
2636 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2637 * invalid value for ifindex to denote "no master".
2639 if (master_idx
== -1)
2642 if (!master
|| master
->ifindex
!= master_idx
)
2648 static bool neigh_ifindex_filtered(struct net_device
*dev
, int filter_idx
)
2650 if (filter_idx
&& (!dev
|| dev
->ifindex
!= filter_idx
))
2656 struct neigh_dump_filter
{
2661 static int neigh_dump_table(struct neigh_table
*tbl
, struct sk_buff
*skb
,
2662 struct netlink_callback
*cb
,
2663 struct neigh_dump_filter
*filter
)
2665 struct net
*net
= sock_net(skb
->sk
);
2666 struct neighbour
*n
;
2667 int err
= 0, h
, s_h
= cb
->args
[1];
2668 int idx
, s_idx
= idx
= cb
->args
[2];
2669 struct neigh_hash_table
*nht
;
2670 unsigned int flags
= NLM_F_MULTI
;
2672 if (filter
->dev_idx
|| filter
->master_idx
)
2673 flags
|= NLM_F_DUMP_FILTERED
;
2675 nht
= rcu_dereference(tbl
->nht
);
2677 for (h
= s_h
; h
< (1 << nht
->hash_shift
); h
++) {
2681 neigh_for_each_in_bucket_rcu(n
, &nht
->hash_heads
[h
]) {
2682 if (idx
< s_idx
|| !net_eq(dev_net(n
->dev
), net
))
2684 if (neigh_ifindex_filtered(n
->dev
, filter
->dev_idx
) ||
2685 neigh_master_filtered(n
->dev
, filter
->master_idx
))
2687 err
= neigh_fill_info(skb
, n
, NETLINK_CB(cb
->skb
).portid
,
2689 RTM_NEWNEIGH
, flags
);
2702 static int pneigh_dump_table(struct neigh_table
*tbl
, struct sk_buff
*skb
,
2703 struct netlink_callback
*cb
,
2704 struct neigh_dump_filter
*filter
)
2706 struct pneigh_entry
*n
;
2707 struct net
*net
= sock_net(skb
->sk
);
2708 int err
= 0, h
, s_h
= cb
->args
[3];
2709 int idx
, s_idx
= idx
= cb
->args
[4];
2710 unsigned int flags
= NLM_F_MULTI
;
2712 if (filter
->dev_idx
|| filter
->master_idx
)
2713 flags
|= NLM_F_DUMP_FILTERED
;
2715 read_lock_bh(&tbl
->lock
);
2717 for (h
= s_h
; h
<= PNEIGH_HASHMASK
; h
++) {
2720 for (n
= tbl
->phash_buckets
[h
], idx
= 0; n
; n
= n
->next
) {
2721 if (idx
< s_idx
|| pneigh_net(n
) != net
)
2723 if (neigh_ifindex_filtered(n
->dev
, filter
->dev_idx
) ||
2724 neigh_master_filtered(n
->dev
, filter
->master_idx
))
2726 err
= pneigh_fill_info(skb
, n
, NETLINK_CB(cb
->skb
).portid
,
2728 RTM_NEWNEIGH
, flags
, tbl
);
2730 read_unlock_bh(&tbl
->lock
);
2738 read_unlock_bh(&tbl
->lock
);
2745 static int neigh_valid_dump_req(const struct nlmsghdr
*nlh
,
2747 struct neigh_dump_filter
*filter
,
2748 struct netlink_ext_ack
*extack
)
2750 struct nlattr
*tb
[NDA_MAX
+ 1];
2756 if (nlh
->nlmsg_len
< nlmsg_msg_size(sizeof(*ndm
))) {
2757 NL_SET_ERR_MSG(extack
, "Invalid header for neighbor dump request");
2761 ndm
= nlmsg_data(nlh
);
2762 if (ndm
->ndm_pad1
|| ndm
->ndm_pad2
|| ndm
->ndm_ifindex
||
2763 ndm
->ndm_state
|| ndm
->ndm_type
) {
2764 NL_SET_ERR_MSG(extack
, "Invalid values in header for neighbor dump request");
2768 if (ndm
->ndm_flags
& ~NTF_PROXY
) {
2769 NL_SET_ERR_MSG(extack
, "Invalid flags in header for neighbor dump request");
2773 err
= nlmsg_parse_deprecated_strict(nlh
, sizeof(struct ndmsg
),
2774 tb
, NDA_MAX
, nda_policy
,
2777 err
= nlmsg_parse_deprecated(nlh
, sizeof(struct ndmsg
), tb
,
2778 NDA_MAX
, nda_policy
, extack
);
2783 for (i
= 0; i
<= NDA_MAX
; ++i
) {
2787 /* all new attributes should require strict_check */
2790 filter
->dev_idx
= nla_get_u32(tb
[i
]);
2793 filter
->master_idx
= nla_get_u32(tb
[i
]);
2797 NL_SET_ERR_MSG(extack
, "Unsupported attribute in neighbor dump request");
2806 static int neigh_dump_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2808 const struct nlmsghdr
*nlh
= cb
->nlh
;
2809 struct neigh_dump_filter filter
= {};
2810 struct neigh_table
*tbl
;
2815 family
= ((struct rtgenmsg
*)nlmsg_data(nlh
))->rtgen_family
;
2817 /* check for full ndmsg structure presence, family member is
2818 * the same for both structures
2820 if (nlmsg_len(nlh
) >= sizeof(struct ndmsg
) &&
2821 ((struct ndmsg
*)nlmsg_data(nlh
))->ndm_flags
== NTF_PROXY
)
2824 err
= neigh_valid_dump_req(nlh
, cb
->strict_check
, &filter
, cb
->extack
);
2825 if (err
< 0 && cb
->strict_check
)
2832 for (t
= 0; t
< NEIGH_NR_TABLES
; t
++) {
2833 tbl
= rcu_dereference(neigh_tables
[t
]);
2837 if (t
< s_t
|| (family
&& tbl
->family
!= family
))
2840 memset(&cb
->args
[1], 0, sizeof(cb
->args
) -
2841 sizeof(cb
->args
[0]));
2843 err
= pneigh_dump_table(tbl
, skb
, cb
, &filter
);
2845 err
= neigh_dump_table(tbl
, skb
, cb
, &filter
);
2855 static int neigh_valid_get_req(const struct nlmsghdr
*nlh
,
2856 struct neigh_table
**tbl
,
2857 void **dst
, int *dev_idx
, u8
*ndm_flags
,
2858 struct netlink_ext_ack
*extack
)
2860 struct nlattr
*tb
[NDA_MAX
+ 1];
2864 if (nlh
->nlmsg_len
< nlmsg_msg_size(sizeof(*ndm
))) {
2865 NL_SET_ERR_MSG(extack
, "Invalid header for neighbor get request");
2869 ndm
= nlmsg_data(nlh
);
2870 if (ndm
->ndm_pad1
|| ndm
->ndm_pad2
|| ndm
->ndm_state
||
2872 NL_SET_ERR_MSG(extack
, "Invalid values in header for neighbor get request");
2876 if (ndm
->ndm_flags
& ~NTF_PROXY
) {
2877 NL_SET_ERR_MSG(extack
, "Invalid flags in header for neighbor get request");
2881 err
= nlmsg_parse_deprecated_strict(nlh
, sizeof(struct ndmsg
), tb
,
2882 NDA_MAX
, nda_policy
, extack
);
2886 *ndm_flags
= ndm
->ndm_flags
;
2887 *dev_idx
= ndm
->ndm_ifindex
;
2888 *tbl
= neigh_find_table(ndm
->ndm_family
);
2890 NL_SET_ERR_MSG(extack
, "Unsupported family in header for neighbor get request");
2891 return -EAFNOSUPPORT
;
2894 for (i
= 0; i
<= NDA_MAX
; ++i
) {
2900 if (nla_len(tb
[i
]) != (int)(*tbl
)->key_len
) {
2901 NL_SET_ERR_MSG(extack
, "Invalid network address in neighbor get request");
2904 *dst
= nla_data(tb
[i
]);
2907 NL_SET_ERR_MSG(extack
, "Unsupported attribute in neighbor get request");
2915 static inline size_t neigh_nlmsg_size(void)
2917 return NLMSG_ALIGN(sizeof(struct ndmsg
))
2918 + nla_total_size(MAX_ADDR_LEN
) /* NDA_DST */
2919 + nla_total_size(MAX_ADDR_LEN
) /* NDA_LLADDR */
2920 + nla_total_size(sizeof(struct nda_cacheinfo
))
2921 + nla_total_size(4) /* NDA_PROBES */
2922 + nla_total_size(4) /* NDA_FLAGS_EXT */
2923 + nla_total_size(1); /* NDA_PROTOCOL */
2926 static int neigh_get_reply(struct net
*net
, struct neighbour
*neigh
,
2929 struct sk_buff
*skb
;
2932 skb
= nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL
);
2936 err
= neigh_fill_info(skb
, neigh
, pid
, seq
, RTM_NEWNEIGH
, 0);
2942 err
= rtnl_unicast(skb
, net
, pid
);
2947 static inline size_t pneigh_nlmsg_size(void)
2949 return NLMSG_ALIGN(sizeof(struct ndmsg
))
2950 + nla_total_size(MAX_ADDR_LEN
) /* NDA_DST */
2951 + nla_total_size(4) /* NDA_FLAGS_EXT */
2952 + nla_total_size(1); /* NDA_PROTOCOL */
2955 static int pneigh_get_reply(struct net
*net
, struct pneigh_entry
*neigh
,
2956 u32 pid
, u32 seq
, struct neigh_table
*tbl
)
2958 struct sk_buff
*skb
;
2961 skb
= nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL
);
2965 err
= pneigh_fill_info(skb
, neigh
, pid
, seq
, RTM_NEWNEIGH
, 0, tbl
);
2971 err
= rtnl_unicast(skb
, net
, pid
);
2976 static int neigh_get(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
,
2977 struct netlink_ext_ack
*extack
)
2979 struct net
*net
= sock_net(in_skb
->sk
);
2980 struct net_device
*dev
= NULL
;
2981 struct neigh_table
*tbl
= NULL
;
2982 struct neighbour
*neigh
;
2988 err
= neigh_valid_get_req(nlh
, &tbl
, &dst
, &dev_idx
, &ndm_flags
,
2994 dev
= __dev_get_by_index(net
, dev_idx
);
2996 NL_SET_ERR_MSG(extack
, "Unknown device ifindex");
3002 NL_SET_ERR_MSG(extack
, "Network address not specified");
3006 if (ndm_flags
& NTF_PROXY
) {
3007 struct pneigh_entry
*pn
;
3009 pn
= pneigh_lookup(tbl
, net
, dst
, dev
, 0);
3011 NL_SET_ERR_MSG(extack
, "Proxy neighbour entry not found");
3014 return pneigh_get_reply(net
, pn
, NETLINK_CB(in_skb
).portid
,
3015 nlh
->nlmsg_seq
, tbl
);
3019 NL_SET_ERR_MSG(extack
, "No device specified");
3023 neigh
= neigh_lookup(tbl
, dst
, dev
);
3025 NL_SET_ERR_MSG(extack
, "Neighbour entry not found");
3029 err
= neigh_get_reply(net
, neigh
, NETLINK_CB(in_skb
).portid
,
3032 neigh_release(neigh
);
3037 void neigh_for_each(struct neigh_table
*tbl
, void (*cb
)(struct neighbour
*, void *), void *cookie
)
3040 struct neigh_hash_table
*nht
;
3043 nht
= rcu_dereference(tbl
->nht
);
3045 read_lock_bh(&tbl
->lock
); /* avoid resizes */
3046 for (chain
= 0; chain
< (1 << nht
->hash_shift
); chain
++) {
3047 struct neighbour
*n
;
3049 neigh_for_each_in_bucket(n
, &nht
->hash_heads
[chain
])
3052 read_unlock_bh(&tbl
->lock
);
3055 EXPORT_SYMBOL(neigh_for_each
);
3057 /* The tbl->lock must be held as a writer and BH disabled. */
3058 void __neigh_for_each_release(struct neigh_table
*tbl
,
3059 int (*cb
)(struct neighbour
*))
3061 struct neigh_hash_table
*nht
;
3064 nht
= rcu_dereference_protected(tbl
->nht
,
3065 lockdep_is_held(&tbl
->lock
));
3066 for (chain
= 0; chain
< (1 << nht
->hash_shift
); chain
++) {
3067 struct hlist_node
*tmp
;
3068 struct neighbour
*n
;
3070 neigh_for_each_in_bucket_safe(n
, tmp
, &nht
->hash_heads
[chain
]) {
3073 write_lock(&n
->lock
);
3076 hlist_del_rcu(&n
->hash
);
3077 hlist_del_rcu(&n
->dev_list
);
3080 write_unlock(&n
->lock
);
3082 neigh_cleanup_and_release(n
);
3086 EXPORT_SYMBOL(__neigh_for_each_release
);
3088 int neigh_xmit(int index
, struct net_device
*dev
,
3089 const void *addr
, struct sk_buff
*skb
)
3091 int err
= -EAFNOSUPPORT
;
3093 if (likely(index
< NEIGH_NR_TABLES
)) {
3094 struct neigh_table
*tbl
;
3095 struct neighbour
*neigh
;
3098 tbl
= rcu_dereference(neigh_tables
[index
]);
3101 if (index
== NEIGH_ARP_TABLE
) {
3102 u32 key
= *((u32
*)addr
);
3104 neigh
= __ipv4_neigh_lookup_noref(dev
, key
);
3106 neigh
= __neigh_lookup_noref(tbl
, addr
, dev
);
3109 neigh
= __neigh_create(tbl
, addr
, dev
, false);
3110 err
= PTR_ERR(neigh
);
3111 if (IS_ERR(neigh
)) {
3115 err
= READ_ONCE(neigh
->output
)(neigh
, skb
);
3119 else if (index
== NEIGH_LINK_TABLE
) {
3120 err
= dev_hard_header(skb
, dev
, ntohs(skb
->protocol
),
3121 addr
, NULL
, skb
->len
);
3124 err
= dev_queue_xmit(skb
);
3132 EXPORT_SYMBOL(neigh_xmit
);
3134 #ifdef CONFIG_PROC_FS
3136 static struct neighbour
*neigh_get_valid(struct seq_file
*seq
,
3137 struct neighbour
*n
,
3140 struct neigh_seq_state
*state
= seq
->private;
3141 struct net
*net
= seq_file_net(seq
);
3143 if (!net_eq(dev_net(n
->dev
), net
))
3146 if (state
->neigh_sub_iter
) {
3150 v
= state
->neigh_sub_iter(state
, n
, pos
? pos
: &fakep
);
3157 if (!(state
->flags
& NEIGH_SEQ_SKIP_NOARP
))
3160 if (READ_ONCE(n
->nud_state
) & ~NUD_NOARP
)
3166 static struct neighbour
*neigh_get_first(struct seq_file
*seq
)
3168 struct neigh_seq_state
*state
= seq
->private;
3169 struct neigh_hash_table
*nht
= state
->nht
;
3170 struct neighbour
*n
, *tmp
;
3172 state
->flags
&= ~NEIGH_SEQ_IS_PNEIGH
;
3174 while (++state
->bucket
< (1 << nht
->hash_shift
)) {
3175 neigh_for_each_in_bucket(n
, &nht
->hash_heads
[state
->bucket
]) {
3176 tmp
= neigh_get_valid(seq
, n
, NULL
);
3185 static struct neighbour
*neigh_get_next(struct seq_file
*seq
,
3186 struct neighbour
*n
,
3189 struct neigh_seq_state
*state
= seq
->private;
3190 struct neighbour
*tmp
;
3192 if (state
->neigh_sub_iter
) {
3193 void *v
= state
->neigh_sub_iter(state
, n
, pos
);
3199 hlist_for_each_entry_continue(n
, hash
) {
3200 tmp
= neigh_get_valid(seq
, n
, pos
);
3207 n
= neigh_get_first(seq
);
3215 static struct neighbour
*neigh_get_idx(struct seq_file
*seq
, loff_t
*pos
)
3217 struct neighbour
*n
= neigh_get_first(seq
);
3222 n
= neigh_get_next(seq
, n
, pos
);
3227 return *pos
? NULL
: n
;
3230 static struct pneigh_entry
*pneigh_get_first(struct seq_file
*seq
)
3232 struct neigh_seq_state
*state
= seq
->private;
3233 struct net
*net
= seq_file_net(seq
);
3234 struct neigh_table
*tbl
= state
->tbl
;
3235 struct pneigh_entry
*pn
= NULL
;
3238 state
->flags
|= NEIGH_SEQ_IS_PNEIGH
;
3239 for (bucket
= 0; bucket
<= PNEIGH_HASHMASK
; bucket
++) {
3240 pn
= tbl
->phash_buckets
[bucket
];
3241 while (pn
&& !net_eq(pneigh_net(pn
), net
))
3246 state
->bucket
= bucket
;
3251 static struct pneigh_entry
*pneigh_get_next(struct seq_file
*seq
,
3252 struct pneigh_entry
*pn
,
3255 struct neigh_seq_state
*state
= seq
->private;
3256 struct net
*net
= seq_file_net(seq
);
3257 struct neigh_table
*tbl
= state
->tbl
;
3261 } while (pn
&& !net_eq(pneigh_net(pn
), net
));
3264 if (++state
->bucket
> PNEIGH_HASHMASK
)
3266 pn
= tbl
->phash_buckets
[state
->bucket
];
3267 while (pn
&& !net_eq(pneigh_net(pn
), net
))
3279 static struct pneigh_entry
*pneigh_get_idx(struct seq_file
*seq
, loff_t
*pos
)
3281 struct pneigh_entry
*pn
= pneigh_get_first(seq
);
3286 pn
= pneigh_get_next(seq
, pn
, pos
);
3291 return *pos
? NULL
: pn
;
3294 static void *neigh_get_idx_any(struct seq_file
*seq
, loff_t
*pos
)
3296 struct neigh_seq_state
*state
= seq
->private;
3298 loff_t idxpos
= *pos
;
3300 rc
= neigh_get_idx(seq
, &idxpos
);
3301 if (!rc
&& !(state
->flags
& NEIGH_SEQ_NEIGH_ONLY
))
3302 rc
= pneigh_get_idx(seq
, &idxpos
);
3307 void *neigh_seq_start(struct seq_file
*seq
, loff_t
*pos
, struct neigh_table
*tbl
, unsigned int neigh_seq_flags
)
3308 __acquires(tbl
->lock
)
3311 struct neigh_seq_state
*state
= seq
->private;
3315 state
->flags
= (neigh_seq_flags
& ~NEIGH_SEQ_IS_PNEIGH
);
3318 state
->nht
= rcu_dereference(tbl
->nht
);
3319 read_lock_bh(&tbl
->lock
);
3321 return *pos
? neigh_get_idx_any(seq
, pos
) : SEQ_START_TOKEN
;
3323 EXPORT_SYMBOL(neigh_seq_start
);
3325 void *neigh_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
3327 struct neigh_seq_state
*state
;
3330 if (v
== SEQ_START_TOKEN
) {
3331 rc
= neigh_get_first(seq
);
3335 state
= seq
->private;
3336 if (!(state
->flags
& NEIGH_SEQ_IS_PNEIGH
)) {
3337 rc
= neigh_get_next(seq
, v
, NULL
);
3340 if (!(state
->flags
& NEIGH_SEQ_NEIGH_ONLY
))
3341 rc
= pneigh_get_first(seq
);
3343 BUG_ON(state
->flags
& NEIGH_SEQ_NEIGH_ONLY
);
3344 rc
= pneigh_get_next(seq
, v
, NULL
);
3350 EXPORT_SYMBOL(neigh_seq_next
);
3352 void neigh_seq_stop(struct seq_file
*seq
, void *v
)
3353 __releases(tbl
->lock
)
3356 struct neigh_seq_state
*state
= seq
->private;
3357 struct neigh_table
*tbl
= state
->tbl
;
3359 read_unlock_bh(&tbl
->lock
);
3362 EXPORT_SYMBOL(neigh_seq_stop
);
3364 /* statistics via seq_file */
3366 static void *neigh_stat_seq_start(struct seq_file
*seq
, loff_t
*pos
)
3368 struct neigh_table
*tbl
= pde_data(file_inode(seq
->file
));
3372 return SEQ_START_TOKEN
;
3374 for (cpu
= *pos
-1; cpu
< nr_cpu_ids
; ++cpu
) {
3375 if (!cpu_possible(cpu
))
3378 return per_cpu_ptr(tbl
->stats
, cpu
);
3383 static void *neigh_stat_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
3385 struct neigh_table
*tbl
= pde_data(file_inode(seq
->file
));
3388 for (cpu
= *pos
; cpu
< nr_cpu_ids
; ++cpu
) {
3389 if (!cpu_possible(cpu
))
3392 return per_cpu_ptr(tbl
->stats
, cpu
);
3398 static void neigh_stat_seq_stop(struct seq_file
*seq
, void *v
)
3403 static int neigh_stat_seq_show(struct seq_file
*seq
, void *v
)
3405 struct neigh_table
*tbl
= pde_data(file_inode(seq
->file
));
3406 struct neigh_statistics
*st
= v
;
3408 if (v
== SEQ_START_TOKEN
) {
3409 seq_puts(seq
, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3413 seq_printf(seq
, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3414 "%08lx %08lx %08lx "
3415 "%08lx %08lx %08lx\n",
3416 atomic_read(&tbl
->entries
),
3427 st
->rcv_probes_mcast
,
3428 st
->rcv_probes_ucast
,
3430 st
->periodic_gc_runs
,
3439 static const struct seq_operations neigh_stat_seq_ops
= {
3440 .start
= neigh_stat_seq_start
,
3441 .next
= neigh_stat_seq_next
,
3442 .stop
= neigh_stat_seq_stop
,
3443 .show
= neigh_stat_seq_show
,
3445 #endif /* CONFIG_PROC_FS */
3447 static void __neigh_notify(struct neighbour
*n
, int type
, int flags
,
3450 struct net
*net
= dev_net(n
->dev
);
3451 struct sk_buff
*skb
;
3454 skb
= nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC
);
3458 err
= neigh_fill_info(skb
, n
, pid
, 0, type
, flags
);
3460 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3461 WARN_ON(err
== -EMSGSIZE
);
3465 rtnl_notify(skb
, net
, 0, RTNLGRP_NEIGH
, NULL
, GFP_ATOMIC
);
3468 rtnl_set_sk_err(net
, RTNLGRP_NEIGH
, err
);
3471 void neigh_app_ns(struct neighbour
*n
)
3473 __neigh_notify(n
, RTM_GETNEIGH
, NLM_F_REQUEST
, 0);
3475 EXPORT_SYMBOL(neigh_app_ns
);
3477 #ifdef CONFIG_SYSCTL
3478 static int unres_qlen_max
= INT_MAX
/ SKB_TRUESIZE(ETH_FRAME_LEN
);
3480 static int proc_unres_qlen(const struct ctl_table
*ctl
, int write
,
3481 void *buffer
, size_t *lenp
, loff_t
*ppos
)
3484 struct ctl_table tmp
= *ctl
;
3486 tmp
.extra1
= SYSCTL_ZERO
;
3487 tmp
.extra2
= &unres_qlen_max
;
3490 size
= *(int *)ctl
->data
/ SKB_TRUESIZE(ETH_FRAME_LEN
);
3491 ret
= proc_dointvec_minmax(&tmp
, write
, buffer
, lenp
, ppos
);
3494 *(int *)ctl
->data
= size
* SKB_TRUESIZE(ETH_FRAME_LEN
);
3498 static void neigh_copy_dflt_parms(struct net
*net
, struct neigh_parms
*p
,
3501 struct net_device
*dev
;
3502 int family
= neigh_parms_family(p
);
3505 for_each_netdev_rcu(net
, dev
) {
3506 struct neigh_parms
*dst_p
=
3507 neigh_get_dev_parms_rcu(dev
, family
);
3509 if (dst_p
&& !test_bit(index
, dst_p
->data_state
))
3510 dst_p
->data
[index
] = p
->data
[index
];
3515 static void neigh_proc_update(const struct ctl_table
*ctl
, int write
)
3517 struct net_device
*dev
= ctl
->extra1
;
3518 struct neigh_parms
*p
= ctl
->extra2
;
3519 struct net
*net
= neigh_parms_net(p
);
3520 int index
= (int *) ctl
->data
- p
->data
;
3525 set_bit(index
, p
->data_state
);
3526 if (index
== NEIGH_VAR_DELAY_PROBE_TIME
)
3527 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE
, p
);
3528 if (!dev
) /* NULL dev means this is default value */
3529 neigh_copy_dflt_parms(net
, p
, index
);
3532 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table
*ctl
, int write
,
3533 void *buffer
, size_t *lenp
,
3536 struct ctl_table tmp
= *ctl
;
3539 tmp
.extra1
= SYSCTL_ZERO
;
3540 tmp
.extra2
= SYSCTL_INT_MAX
;
3542 ret
= proc_dointvec_minmax(&tmp
, write
, buffer
, lenp
, ppos
);
3543 neigh_proc_update(ctl
, write
);
3547 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table
*ctl
, int write
,
3548 void *buffer
, size_t *lenp
, loff_t
*ppos
)
3550 struct ctl_table tmp
= *ctl
;
3553 int min
= msecs_to_jiffies(1);
3558 ret
= proc_dointvec_ms_jiffies_minmax(&tmp
, write
, buffer
, lenp
, ppos
);
3559 neigh_proc_update(ctl
, write
);
3563 int neigh_proc_dointvec(const struct ctl_table
*ctl
, int write
, void *buffer
,
3564 size_t *lenp
, loff_t
*ppos
)
3566 int ret
= proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
3568 neigh_proc_update(ctl
, write
);
3571 EXPORT_SYMBOL(neigh_proc_dointvec
);
3573 int neigh_proc_dointvec_jiffies(const struct ctl_table
*ctl
, int write
, void *buffer
,
3574 size_t *lenp
, loff_t
*ppos
)
3576 int ret
= proc_dointvec_jiffies(ctl
, write
, buffer
, lenp
, ppos
);
3578 neigh_proc_update(ctl
, write
);
3581 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies
);
3583 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table
*ctl
, int write
,
3584 void *buffer
, size_t *lenp
,
3587 int ret
= proc_dointvec_userhz_jiffies(ctl
, write
, buffer
, lenp
, ppos
);
3589 neigh_proc_update(ctl
, write
);
3593 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table
*ctl
, int write
,
3594 void *buffer
, size_t *lenp
, loff_t
*ppos
)
3596 int ret
= proc_dointvec_ms_jiffies(ctl
, write
, buffer
, lenp
, ppos
);
3598 neigh_proc_update(ctl
, write
);
3601 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies
);
3603 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table
*ctl
, int write
,
3604 void *buffer
, size_t *lenp
,
3607 int ret
= proc_unres_qlen(ctl
, write
, buffer
, lenp
, ppos
);
3609 neigh_proc_update(ctl
, write
);
3613 static int neigh_proc_base_reachable_time(const struct ctl_table
*ctl
, int write
,
3614 void *buffer
, size_t *lenp
,
3617 struct neigh_parms
*p
= ctl
->extra2
;
3620 if (strcmp(ctl
->procname
, "base_reachable_time") == 0)
3621 ret
= neigh_proc_dointvec_jiffies(ctl
, write
, buffer
, lenp
, ppos
);
3622 else if (strcmp(ctl
->procname
, "base_reachable_time_ms") == 0)
3623 ret
= neigh_proc_dointvec_ms_jiffies(ctl
, write
, buffer
, lenp
, ppos
);
3627 if (write
&& ret
== 0) {
3628 /* update reachable_time as well, otherwise, the change will
3629 * only be effective after the next time neigh_periodic_work
3630 * decides to recompute it
3633 neigh_rand_reach_time(NEIGH_VAR(p
, BASE_REACHABLE_TIME
));
3638 #define NEIGH_PARMS_DATA_OFFSET(index) \
3639 (&((struct neigh_parms *) 0)->data[index])
3641 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3642 [NEIGH_VAR_ ## attr] = { \
3644 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3645 .maxlen = sizeof(int), \
3647 .proc_handler = proc, \
3650 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3651 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3653 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3654 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3656 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3657 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3659 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3660 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3662 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3663 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3665 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3666 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3668 static struct neigh_sysctl_table
{
3669 struct ctl_table_header
*sysctl_header
;
3670 struct ctl_table neigh_vars
[NEIGH_VAR_MAX
];
3671 } neigh_sysctl_template __read_mostly
= {
3673 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES
, "mcast_solicit"),
3674 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES
, "ucast_solicit"),
3675 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES
, "app_solicit"),
3676 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES
, "mcast_resolicit"),
3677 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME
, "retrans_time"),
3678 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME
, "base_reachable_time"),
3679 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME
, "delay_first_probe_time"),
3680 NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS
,
3681 "interval_probe_time_ms"),
3682 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME
, "gc_stale_time"),
3683 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES
, "unres_qlen_bytes"),
3684 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN
, "proxy_qlen"),
3685 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY
, "anycast_delay"),
3686 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY
, "proxy_delay"),
3687 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME
, "locktime"),
3688 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN
, QUEUE_LEN_BYTES
, "unres_qlen"),
3689 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS
, RETRANS_TIME
, "retrans_time_ms"),
3690 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS
, BASE_REACHABLE_TIME
, "base_reachable_time_ms"),
3691 [NEIGH_VAR_GC_INTERVAL
] = {
3692 .procname
= "gc_interval",
3693 .maxlen
= sizeof(int),
3695 .proc_handler
= proc_dointvec_jiffies
,
3697 [NEIGH_VAR_GC_THRESH1
] = {
3698 .procname
= "gc_thresh1",
3699 .maxlen
= sizeof(int),
3701 .extra1
= SYSCTL_ZERO
,
3702 .extra2
= SYSCTL_INT_MAX
,
3703 .proc_handler
= proc_dointvec_minmax
,
3705 [NEIGH_VAR_GC_THRESH2
] = {
3706 .procname
= "gc_thresh2",
3707 .maxlen
= sizeof(int),
3709 .extra1
= SYSCTL_ZERO
,
3710 .extra2
= SYSCTL_INT_MAX
,
3711 .proc_handler
= proc_dointvec_minmax
,
3713 [NEIGH_VAR_GC_THRESH3
] = {
3714 .procname
= "gc_thresh3",
3715 .maxlen
= sizeof(int),
3717 .extra1
= SYSCTL_ZERO
,
3718 .extra2
= SYSCTL_INT_MAX
,
3719 .proc_handler
= proc_dointvec_minmax
,
3724 int neigh_sysctl_register(struct net_device
*dev
, struct neigh_parms
*p
,
3725 proc_handler
*handler
)
3728 struct neigh_sysctl_table
*t
;
3729 const char *dev_name_source
;
3730 char neigh_path
[ sizeof("net//neigh/") + IFNAMSIZ
+ IFNAMSIZ
];
3732 size_t neigh_vars_size
;
3734 t
= kmemdup(&neigh_sysctl_template
, sizeof(*t
), GFP_KERNEL_ACCOUNT
);
3738 for (i
= 0; i
< NEIGH_VAR_GC_INTERVAL
; i
++) {
3739 t
->neigh_vars
[i
].data
+= (long) p
;
3740 t
->neigh_vars
[i
].extra1
= dev
;
3741 t
->neigh_vars
[i
].extra2
= p
;
3744 neigh_vars_size
= ARRAY_SIZE(t
->neigh_vars
);
3746 dev_name_source
= dev
->name
;
3747 /* Terminate the table early */
3748 neigh_vars_size
= NEIGH_VAR_BASE_REACHABLE_TIME_MS
+ 1;
3750 struct neigh_table
*tbl
= p
->tbl
;
3751 dev_name_source
= "default";
3752 t
->neigh_vars
[NEIGH_VAR_GC_INTERVAL
].data
= &tbl
->gc_interval
;
3753 t
->neigh_vars
[NEIGH_VAR_GC_THRESH1
].data
= &tbl
->gc_thresh1
;
3754 t
->neigh_vars
[NEIGH_VAR_GC_THRESH2
].data
= &tbl
->gc_thresh2
;
3755 t
->neigh_vars
[NEIGH_VAR_GC_THRESH3
].data
= &tbl
->gc_thresh3
;
3760 t
->neigh_vars
[NEIGH_VAR_RETRANS_TIME
].proc_handler
= handler
;
3762 t
->neigh_vars
[NEIGH_VAR_BASE_REACHABLE_TIME
].proc_handler
= handler
;
3763 /* RetransTime (in milliseconds)*/
3764 t
->neigh_vars
[NEIGH_VAR_RETRANS_TIME_MS
].proc_handler
= handler
;
3765 /* ReachableTime (in milliseconds) */
3766 t
->neigh_vars
[NEIGH_VAR_BASE_REACHABLE_TIME_MS
].proc_handler
= handler
;
3768 /* Those handlers will update p->reachable_time after
3769 * base_reachable_time(_ms) is set to ensure the new timer starts being
3770 * applied after the next neighbour update instead of waiting for
3771 * neigh_periodic_work to update its value (can be multiple minutes)
3772 * So any handler that replaces them should do this as well
3775 t
->neigh_vars
[NEIGH_VAR_BASE_REACHABLE_TIME
].proc_handler
=
3776 neigh_proc_base_reachable_time
;
3777 /* ReachableTime (in milliseconds) */
3778 t
->neigh_vars
[NEIGH_VAR_BASE_REACHABLE_TIME_MS
].proc_handler
=
3779 neigh_proc_base_reachable_time
;
3782 switch (neigh_parms_family(p
)) {
3793 snprintf(neigh_path
, sizeof(neigh_path
), "net/%s/neigh/%s",
3794 p_name
, dev_name_source
);
3795 t
->sysctl_header
= register_net_sysctl_sz(neigh_parms_net(p
),
3796 neigh_path
, t
->neigh_vars
,
3798 if (!t
->sysctl_header
)
3801 p
->sysctl_table
= t
;
3809 EXPORT_SYMBOL(neigh_sysctl_register
);
3811 void neigh_sysctl_unregister(struct neigh_parms
*p
)
3813 if (p
->sysctl_table
) {
3814 struct neigh_sysctl_table
*t
= p
->sysctl_table
;
3815 p
->sysctl_table
= NULL
;
3816 unregister_net_sysctl_table(t
->sysctl_header
);
3820 EXPORT_SYMBOL(neigh_sysctl_unregister
);
3822 #endif /* CONFIG_SYSCTL */
3824 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers
[] __initconst
= {
3825 {.msgtype
= RTM_NEWNEIGH
, .doit
= neigh_add
},
3826 {.msgtype
= RTM_DELNEIGH
, .doit
= neigh_delete
},
3827 {.msgtype
= RTM_GETNEIGH
, .doit
= neigh_get
, .dumpit
= neigh_dump_info
,
3828 .flags
= RTNL_FLAG_DUMP_UNLOCKED
},
3829 {.msgtype
= RTM_GETNEIGHTBL
, .dumpit
= neightbl_dump_info
},
3830 {.msgtype
= RTM_SETNEIGHTBL
, .doit
= neightbl_set
},
3833 static int __init
neigh_init(void)
3835 rtnl_register_many(neigh_rtnl_msg_handlers
);
3839 subsys_initcall(neigh_init
);