2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/socket.h>
21 #include <linux/sched.h>
22 #include <linux/netdevice.h>
24 #include <linux/sysctl.h>
26 #include <net/neighbour.h>
29 #include <linux/rtnetlink.h>
33 #define NEIGH_PRINTK(x...) printk(x)
34 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
35 #define NEIGH_PRINTK0 NEIGH_PRINTK
36 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
37 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
41 #define NEIGH_PRINTK1 NEIGH_PRINTK
45 #define NEIGH_PRINTK2 NEIGH_PRINTK
48 static void neigh_timer_handler(unsigned long arg
);
50 static void neigh_app_notify(struct neighbour
*n
);
52 static int pneigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
);
54 static int neigh_glbl_allocs
;
55 static struct neigh_table
*neigh_tables
;
57 #if defined(__i386__) && defined(__SMP__)
58 #define ASSERT_WL(n) if ((int)((n)->lock.lock) >= 0) { printk("WL assertion failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); }
60 #define ASSERT_WL(n) do { } while(0)
64 Neighbour hash table buckets are protected with rwlock tbl->lock.
66 - All the scans/updates to hash buckets MUST be made under this lock.
67 - NOTHING clever should be made under this lock: no callbacks
68 to protocol backends, no attempts to send something to network.
69 It will result in deadlocks, if backend/driver wants to use neighbour
71 - If the entry requires some non-trivial actions, increase
72 its reference count and release table lock.
74 Neighbour entries are protected:
75 - with reference count.
76 - with rwlock neigh->lock
78 Reference count prevents destruction.
80 neigh->lock mainly serializes ll address data and its validity state.
81 However, the same lock is used to protect another entry fields:
85 Again, nothing clever shall be made under neigh->lock,
86 the most complicated procedure, which we allow is dev->hard_header.
87 It is supposed, that dev->hard_header is simplistic and does
88 not make callbacks to neighbour tables.
90 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
91 list of neighbour tables. This list is used only in process context,
94 static rwlock_t neigh_tbl_lock
= RW_LOCK_UNLOCKED
;
96 static int neigh_blackhole(struct sk_buff
*skb
)
103 * It is random distribution in the interval (1/2)*base...(3/2)*base.
104 * It corresponds to default IPv6 settings and is not overridable,
105 * because it is really reasonbale choice.
108 unsigned long neigh_rand_reach_time(unsigned long base
)
110 return (net_random() % base
) + (base
>>1);
114 static int neigh_forced_gc(struct neigh_table
*tbl
)
119 for (i
=0; i
<=NEIGH_HASHMASK
; i
++) {
120 struct neighbour
*n
, **np
;
122 np
= &tbl
->hash_buckets
[i
];
123 write_lock_bh(&tbl
->lock
);
124 while ((n
= *np
) != NULL
) {
125 /* Neighbour record may be discarded if:
126 - nobody refers to it.
127 - it is not premanent
128 - (NEW and probably wrong)
129 INCOMPLETE entries are kept at least for
130 n->parms->retrans_time, otherwise we could
131 flood network with resolution requests.
132 It is not clear, what is better table overflow
135 write_lock(&n
->lock
);
136 if (atomic_read(&n
->refcnt
) == 1 &&
137 !(n
->nud_state
&NUD_PERMANENT
) &&
138 (n
->nud_state
!= NUD_INCOMPLETE
||
139 jiffies
- n
->used
> n
->parms
->retrans_time
)) {
143 write_unlock(&n
->lock
);
147 write_unlock(&n
->lock
);
150 write_unlock_bh(&tbl
->lock
);
153 tbl
->last_flush
= jiffies
;
157 static int neigh_del_timer(struct neighbour
*n
)
159 if (n
->nud_state
& NUD_IN_TIMER
) {
160 if (del_timer(&n
->timer
)) {
168 int neigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
)
172 write_lock_bh(&tbl
->lock
);
174 for (i
=0; i
<=NEIGH_HASHMASK
; i
++) {
175 struct neighbour
*n
, **np
;
177 np
= &tbl
->hash_buckets
[i
];
178 while ((n
= *np
) != NULL
) {
179 if (dev
&& n
->dev
!= dev
) {
184 write_lock(&n
->lock
);
188 if (atomic_read(&n
->refcnt
) != 1) {
189 /* The most unpleasant situation.
190 We must destroy neighbour entry,
191 but someone still uses it.
193 The destroy will be delayed until
194 the last user releases us, but
195 we must kill timers etc. and move
198 n
->parms
= &tbl
->parms
;
199 skb_queue_purge(&n
->arp_queue
);
200 n
->output
= neigh_blackhole
;
201 if (n
->nud_state
&NUD_VALID
)
202 n
->nud_state
= NUD_NOARP
;
204 n
->nud_state
= NUD_NONE
;
205 NEIGH_PRINTK2("neigh %p is stray.\n", n
);
207 write_unlock(&n
->lock
);
212 del_timer(&tbl
->proxy_timer
);
213 skb_queue_purge(&tbl
->proxy_queue
);
214 pneigh_ifdown(tbl
, dev
);
215 write_unlock_bh(&tbl
->lock
);
219 static struct neighbour
*neigh_alloc(struct neigh_table
*tbl
)
222 unsigned long now
= jiffies
;
224 if (tbl
->entries
> tbl
->gc_thresh3
||
225 (tbl
->entries
> tbl
->gc_thresh2
&&
226 now
- tbl
->last_flush
> 5*HZ
)) {
227 if (neigh_forced_gc(tbl
) == 0 &&
228 tbl
->entries
> tbl
->gc_thresh3
)
232 n
= kmem_cache_alloc(tbl
->kmem_cachep
, SLAB_ATOMIC
);
236 memset(n
, 0, tbl
->entry_size
);
238 skb_queue_head_init(&n
->arp_queue
);
239 n
->lock
= RW_LOCK_UNLOCKED
;
240 n
->updated
= n
->used
= now
;
241 n
->nud_state
= NUD_NONE
;
242 n
->output
= neigh_blackhole
;
243 n
->parms
= &tbl
->parms
;
244 init_timer(&n
->timer
);
245 n
->timer
.function
= neigh_timer_handler
;
246 n
->timer
.data
= (unsigned long)n
;
251 atomic_set(&n
->refcnt
, 1);
256 struct neighbour
*neigh_lookup(struct neigh_table
*tbl
, const void *pkey
,
257 struct net_device
*dev
)
261 int key_len
= tbl
->key_len
;
263 hash_val
= tbl
->hash(pkey
, dev
);
265 read_lock_bh(&tbl
->lock
);
266 for (n
= tbl
->hash_buckets
[hash_val
]; n
; n
= n
->next
) {
268 memcmp(n
->primary_key
, pkey
, key_len
) == 0) {
273 read_unlock_bh(&tbl
->lock
);
277 struct neighbour
* neigh_create(struct neigh_table
*tbl
, const void *pkey
,
278 struct net_device
*dev
)
280 struct neighbour
*n
, *n1
;
282 int key_len
= tbl
->key_len
;
284 n
= neigh_alloc(tbl
);
288 memcpy(n
->primary_key
, pkey
, key_len
);
292 /* Protocol specific setup. */
293 if (tbl
->constructor
&& tbl
->constructor(n
) < 0) {
298 /* Device specific setup. */
299 if (n
->parms
&& n
->parms
->neigh_setup
&& n
->parms
->neigh_setup(n
) < 0) {
304 n
->confirmed
= jiffies
- (n
->parms
->base_reachable_time
<<1);
306 hash_val
= tbl
->hash(pkey
, dev
);
308 write_lock_bh(&tbl
->lock
);
309 for (n1
= tbl
->hash_buckets
[hash_val
]; n1
; n1
= n1
->next
) {
310 if (dev
== n1
->dev
&&
311 memcmp(n1
->primary_key
, pkey
, key_len
) == 0) {
313 write_unlock_bh(&tbl
->lock
);
319 n
->next
= tbl
->hash_buckets
[hash_val
];
320 tbl
->hash_buckets
[hash_val
] = n
;
323 write_unlock_bh(&tbl
->lock
);
324 NEIGH_PRINTK2("neigh %p is created.\n", n
);
328 struct pneigh_entry
* pneigh_lookup(struct neigh_table
*tbl
, const void *pkey
,
329 struct net_device
*dev
, int creat
)
331 struct pneigh_entry
*n
;
333 int key_len
= tbl
->key_len
;
335 hash_val
= *(u32
*)(pkey
+ key_len
- 4);
336 hash_val
^= (hash_val
>>16);
337 hash_val
^= hash_val
>>8;
338 hash_val
^= hash_val
>>4;
339 hash_val
&= PNEIGH_HASHMASK
;
341 read_lock_bh(&tbl
->lock
);
343 for (n
= tbl
->phash_buckets
[hash_val
]; n
; n
= n
->next
) {
344 if (memcmp(n
->key
, pkey
, key_len
) == 0 &&
345 (n
->dev
== dev
|| !n
->dev
)) {
346 read_unlock_bh(&tbl
->lock
);
350 read_unlock_bh(&tbl
->lock
);
354 n
= kmalloc(sizeof(*n
) + key_len
, GFP_KERNEL
);
358 memcpy(n
->key
, pkey
, key_len
);
361 if (tbl
->pconstructor
&& tbl
->pconstructor(n
)) {
366 write_lock_bh(&tbl
->lock
);
367 n
->next
= tbl
->phash_buckets
[hash_val
];
368 tbl
->phash_buckets
[hash_val
] = n
;
369 write_unlock_bh(&tbl
->lock
);
374 int pneigh_delete(struct neigh_table
*tbl
, const void *pkey
, struct net_device
*dev
)
376 struct pneigh_entry
*n
, **np
;
378 int key_len
= tbl
->key_len
;
380 hash_val
= *(u32
*)(pkey
+ key_len
- 4);
381 hash_val
^= (hash_val
>>16);
382 hash_val
^= hash_val
>>8;
383 hash_val
^= hash_val
>>4;
384 hash_val
&= PNEIGH_HASHMASK
;
386 for (np
= &tbl
->phash_buckets
[hash_val
]; (n
=*np
) != NULL
; np
= &n
->next
) {
387 if (memcmp(n
->key
, pkey
, key_len
) == 0 && n
->dev
== dev
) {
388 write_lock_bh(&tbl
->lock
);
390 write_unlock_bh(&tbl
->lock
);
391 if (tbl
->pdestructor
)
400 static int pneigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
)
402 struct pneigh_entry
*n
, **np
;
405 for (h
=0; h
<=PNEIGH_HASHMASK
; h
++) {
406 np
= &tbl
->phash_buckets
[h
];
407 for (np
= &tbl
->phash_buckets
[h
]; (n
=*np
) != NULL
; np
= &n
->next
) {
408 if (n
->dev
== dev
|| dev
== NULL
) {
410 if (tbl
->pdestructor
)
423 * neighbour must already be out of the table;
426 void neigh_destroy(struct neighbour
*neigh
)
431 printk("Destroying alive neighbour %p from %08lx\n", neigh
,
432 *(((unsigned long*)&neigh
)-1));
436 if (neigh_del_timer(neigh
))
437 printk("Impossible event.\n");
439 while ((hh
= neigh
->hh
) != NULL
) {
440 neigh
->hh
= hh
->hh_next
;
442 write_lock_bh(&hh
->hh_lock
);
443 hh
->hh_output
= neigh_blackhole
;
444 write_unlock_bh(&hh
->hh_lock
);
445 if (atomic_dec_and_test(&hh
->hh_refcnt
))
449 if (neigh
->ops
&& neigh
->ops
->destructor
)
450 (neigh
->ops
->destructor
)(neigh
);
452 skb_queue_purge(&neigh
->arp_queue
);
456 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh
);
459 neigh
->tbl
->entries
--;
460 kmem_cache_free(neigh
->tbl
->kmem_cachep
, neigh
);
463 /* Neighbour state is suspicious;
466 Called with write_locked neigh.
468 static void neigh_suspect(struct neighbour
*neigh
)
472 NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh
);
476 neigh
->output
= neigh
->ops
->output
;
478 for (hh
= neigh
->hh
; hh
; hh
= hh
->hh_next
)
479 hh
->hh_output
= neigh
->ops
->output
;
482 /* Neighbour state is OK;
485 Called with write_locked neigh.
487 static void neigh_connect(struct neighbour
*neigh
)
491 NEIGH_PRINTK2("neigh %p is connected.\n", neigh
);
495 neigh
->output
= neigh
->ops
->connected_output
;
497 for (hh
= neigh
->hh
; hh
; hh
= hh
->hh_next
)
498 hh
->hh_output
= neigh
->ops
->hh_output
;
502 Transitions NUD_STALE <-> NUD_REACHABLE do not occur
503 when fast path is built: we have no timers assotiated with
504 these states, we do not have time to check state when sending.
505 neigh_periodic_timer check periodically neigh->confirmed
506 time and moves NUD_REACHABLE -> NUD_STALE.
508 If a routine wants to know TRUE entry state, it calls
509 neigh_sync before checking state.
511 Called with write_locked neigh.
514 static void neigh_sync(struct neighbour
*n
)
516 unsigned long now
= jiffies
;
517 u8 state
= n
->nud_state
;
520 if (state
&(NUD_NOARP
|NUD_PERMANENT
))
522 if (state
&NUD_REACHABLE
) {
523 if (now
- n
->confirmed
> n
->parms
->reachable_time
) {
524 n
->nud_state
= NUD_STALE
;
527 } else if (state
&NUD_VALID
) {
528 if (now
- n
->confirmed
< n
->parms
->reachable_time
) {
530 n
->nud_state
= NUD_REACHABLE
;
536 static void neigh_periodic_timer(unsigned long arg
)
538 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
539 unsigned long now
= jiffies
;
543 write_lock(&tbl
->lock
);
546 * periodicly recompute ReachableTime from random function
549 if (now
- tbl
->last_rand
> 300*HZ
) {
550 struct neigh_parms
*p
;
551 tbl
->last_rand
= now
;
552 for (p
=&tbl
->parms
; p
; p
= p
->next
)
553 p
->reachable_time
= neigh_rand_reach_time(p
->base_reachable_time
);
556 for (i
=0; i
<= NEIGH_HASHMASK
; i
++) {
557 struct neighbour
*n
, **np
;
559 np
= &tbl
->hash_buckets
[i
];
560 while ((n
= *np
) != NULL
) {
563 write_lock(&n
->lock
);
565 state
= n
->nud_state
;
566 if (state
&(NUD_PERMANENT
|NUD_IN_TIMER
)) {
567 write_unlock(&n
->lock
);
571 if ((long)(n
->used
- n
->confirmed
) < 0)
572 n
->used
= n
->confirmed
;
574 if (atomic_read(&n
->refcnt
) == 1 &&
575 (state
== NUD_FAILED
|| now
- n
->used
> n
->parms
->gc_staletime
)) {
578 write_unlock(&n
->lock
);
583 if (n
->nud_state
&NUD_REACHABLE
&&
584 now
- n
->confirmed
> n
->parms
->reachable_time
) {
585 n
->nud_state
= NUD_STALE
;
588 write_unlock(&n
->lock
);
595 tbl
->gc_timer
.expires
= now
+ tbl
->gc_interval
;
596 add_timer(&tbl
->gc_timer
);
597 write_unlock(&tbl
->lock
);
600 static __inline__
int neigh_max_probes(struct neighbour
*n
)
602 struct neigh_parms
*p
= n
->parms
;
603 return p
->ucast_probes
+ p
->app_probes
+ p
->mcast_probes
;
607 /* Called when a timer expires for a neighbour entry. */
609 static void neigh_timer_handler(unsigned long arg
)
611 unsigned long now
= jiffies
;
612 struct neighbour
*neigh
= (struct neighbour
*)arg
;
616 write_lock(&neigh
->lock
);
618 state
= neigh
->nud_state
;
620 if (!(state
&NUD_IN_TIMER
)) {
622 printk("neigh: timer & !nud_in_timer\n");
627 if ((state
&NUD_VALID
) &&
628 now
- neigh
->confirmed
< neigh
->parms
->reachable_time
) {
629 neigh
->nud_state
= NUD_REACHABLE
;
630 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh
);
631 neigh_connect(neigh
);
634 if (state
== NUD_DELAY
) {
635 NEIGH_PRINTK2("neigh %p is probed.\n", neigh
);
636 neigh
->nud_state
= NUD_PROBE
;
637 atomic_set(&neigh
->probes
, 0);
640 if (atomic_read(&neigh
->probes
) >= neigh_max_probes(neigh
)) {
643 neigh
->nud_state
= NUD_FAILED
;
645 neigh
->tbl
->stats
.res_failed
++;
646 NEIGH_PRINTK2("neigh %p is failed.\n", neigh
);
648 /* It is very thin place. report_unreachable is very complicated
649 routine. Particularly, it can hit the same neighbour entry!
651 So that, we try to be accurate and avoid dead loop. --ANK
653 while(neigh
->nud_state
==NUD_FAILED
&& (skb
=__skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
654 write_unlock(&neigh
->lock
);
655 neigh
->ops
->error_report(neigh
, skb
);
656 write_lock(&neigh
->lock
);
658 skb_queue_purge(&neigh
->arp_queue
);
662 neigh
->timer
.expires
= now
+ neigh
->parms
->retrans_time
;
663 add_timer(&neigh
->timer
);
664 write_unlock(&neigh
->lock
);
666 neigh
->ops
->solicit(neigh
, skb_peek(&neigh
->arp_queue
));
667 atomic_inc(&neigh
->probes
);
671 write_unlock(&neigh
->lock
);
673 if (notify
&& neigh
->parms
->app_probes
)
674 neigh_app_notify(neigh
);
676 neigh_release(neigh
);
679 int __neigh_event_send(struct neighbour
*neigh
, struct sk_buff
*skb
)
681 write_lock_bh(&neigh
->lock
);
682 if (!(neigh
->nud_state
&(NUD_CONNECTED
|NUD_DELAY
|NUD_PROBE
))) {
683 if (!(neigh
->nud_state
&(NUD_STALE
|NUD_INCOMPLETE
))) {
684 if (neigh
->parms
->mcast_probes
+ neigh
->parms
->app_probes
) {
685 atomic_set(&neigh
->probes
, neigh
->parms
->ucast_probes
);
686 neigh
->nud_state
= NUD_INCOMPLETE
;
688 neigh
->timer
.expires
= jiffies
+ neigh
->parms
->retrans_time
;
689 add_timer(&neigh
->timer
);
690 write_unlock_bh(&neigh
->lock
);
691 neigh
->ops
->solicit(neigh
, skb
);
692 atomic_inc(&neigh
->probes
);
693 write_lock_bh(&neigh
->lock
);
695 neigh
->nud_state
= NUD_FAILED
;
696 write_unlock_bh(&neigh
->lock
);
703 if (neigh
->nud_state
== NUD_INCOMPLETE
) {
705 if (skb_queue_len(&neigh
->arp_queue
) >= neigh
->parms
->queue_len
) {
706 struct sk_buff
*buff
;
707 buff
= neigh
->arp_queue
.prev
;
708 __skb_unlink(buff
, &neigh
->arp_queue
);
711 __skb_queue_head(&neigh
->arp_queue
, skb
);
713 write_unlock_bh(&neigh
->lock
);
716 if (neigh
->nud_state
== NUD_STALE
) {
717 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh
);
719 neigh
->nud_state
= NUD_DELAY
;
720 neigh
->timer
.expires
= jiffies
+ neigh
->parms
->delay_probe_time
;
721 add_timer(&neigh
->timer
);
724 write_unlock_bh(&neigh
->lock
);
728 static __inline__
void neigh_update_hhs(struct neighbour
*neigh
)
731 void (*update
)(struct hh_cache
*, struct net_device
*, unsigned char*) =
732 neigh
->dev
->header_cache_update
;
735 for (hh
=neigh
->hh
; hh
; hh
=hh
->hh_next
) {
736 write_lock_bh(&hh
->hh_lock
);
737 update(hh
, neigh
->dev
, neigh
->ha
);
738 write_unlock_bh(&hh
->hh_lock
);
745 /* Generic update routine.
746 -- lladdr is new lladdr or NULL, if it is not supplied.
748 -- override==1 allows to override existing lladdr, if it is different.
749 -- arp==0 means that the change is administrative.
751 Caller MUST hold reference count on the entry.
754 int neigh_update(struct neighbour
*neigh
, const u8
*lladdr
, u8
new, int override
, int arp
)
759 struct net_device
*dev
= neigh
->dev
;
761 write_lock_bh(&neigh
->lock
);
762 old
= neigh
->nud_state
;
765 if (arp
&& (old
&(NUD_NOARP
|NUD_PERMANENT
)))
768 if (!(new&NUD_VALID
)) {
769 neigh_del_timer(neigh
);
770 if (old
&NUD_CONNECTED
)
771 neigh_suspect(neigh
);
772 neigh
->nud_state
= new;
774 notify
= old
&NUD_VALID
;
778 /* Compare new lladdr with cached one */
779 if (dev
->addr_len
== 0) {
780 /* First case: device needs no address. */
783 /* The second case: if something is already cached
784 and a new address is proposed:
786 - if they are different, check override flag
789 if (memcmp(lladdr
, neigh
->ha
, dev
->addr_len
) == 0)
795 /* No address is supplied; if we know something,
796 use it, otherwise discard the request.
799 if (!(old
&NUD_VALID
))
805 old
= neigh
->nud_state
;
806 if (new&NUD_CONNECTED
)
807 neigh
->confirmed
= jiffies
;
808 neigh
->updated
= jiffies
;
810 /* If entry was valid and address is not changed,
811 do not change entry state, if new one is STALE.
815 if (lladdr
== neigh
->ha
)
816 if (new == old
|| (new == NUD_STALE
&& (old
&NUD_CONNECTED
)))
819 neigh_del_timer(neigh
);
820 neigh
->nud_state
= new;
821 if (lladdr
!= neigh
->ha
) {
822 memcpy(&neigh
->ha
, lladdr
, dev
->addr_len
);
823 neigh_update_hhs(neigh
);
824 neigh
->confirmed
= jiffies
- (neigh
->parms
->base_reachable_time
<<1);
831 if (new&NUD_CONNECTED
)
832 neigh_connect(neigh
);
834 neigh_suspect(neigh
);
835 if (!(old
&NUD_VALID
)) {
838 /* Again: avoid dead loop if something went wrong */
840 while (neigh
->nud_state
&NUD_VALID
&&
841 (skb
=__skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
842 struct neighbour
*n1
= neigh
;
843 write_unlock_bh(&neigh
->lock
);
844 /* On shaper/eql skb->dst->neighbour != neigh :( */
845 if (skb
->dst
&& skb
->dst
->neighbour
)
846 n1
= skb
->dst
->neighbour
;
848 write_lock_bh(&neigh
->lock
);
850 skb_queue_purge(&neigh
->arp_queue
);
853 write_unlock_bh(&neigh
->lock
);
855 if (notify
&& neigh
->parms
->app_probes
)
856 neigh_app_notify(neigh
);
861 struct neighbour
* neigh_event_ns(struct neigh_table
*tbl
,
862 u8
*lladdr
, void *saddr
,
863 struct net_device
*dev
)
865 struct neighbour
*neigh
;
867 neigh
= __neigh_lookup(tbl
, saddr
, dev
, lladdr
|| !dev
->addr_len
);
869 neigh_update(neigh
, lladdr
, NUD_STALE
, 1, 1);
873 static void neigh_hh_init(struct neighbour
*n
, struct dst_entry
*dst
, u16 protocol
)
875 struct hh_cache
*hh
= NULL
;
876 struct net_device
*dev
= dst
->dev
;
878 for (hh
=n
->hh
; hh
; hh
= hh
->hh_next
)
879 if (hh
->hh_type
== protocol
)
882 if (!hh
&& (hh
= kmalloc(sizeof(*hh
), GFP_ATOMIC
)) != NULL
) {
883 memset(hh
, 0, sizeof(struct hh_cache
));
884 hh
->hh_type
= protocol
;
885 atomic_set(&hh
->hh_refcnt
, 0);
887 if (dev
->hard_header_cache(n
, hh
)) {
891 atomic_inc(&hh
->hh_refcnt
);
894 if (n
->nud_state
&NUD_CONNECTED
)
895 hh
->hh_output
= n
->ops
->hh_output
;
897 hh
->hh_output
= n
->ops
->output
;
901 atomic_inc(&hh
->hh_refcnt
);
906 /* This function can be used in contexts, where only old dev_queue_xmit
907 worked, f.e. if you want to override normal output path (eql, shaper),
908 but resoltution is not made yet.
911 int neigh_compat_output(struct sk_buff
*skb
)
913 struct net_device
*dev
= skb
->dev
;
915 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
917 if (dev
->hard_header
&&
918 dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), NULL
, NULL
, skb
->len
) < 0 &&
919 dev
->rebuild_header(skb
))
922 return dev_queue_xmit(skb
);
925 /* Slow and careful. */
927 int neigh_resolve_output(struct sk_buff
*skb
)
929 struct dst_entry
*dst
= skb
->dst
;
930 struct neighbour
*neigh
;
932 if (!dst
|| !(neigh
= dst
->neighbour
))
935 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
937 if (neigh_event_send(neigh
, skb
) == 0) {
939 struct net_device
*dev
= neigh
->dev
;
940 if (dev
->hard_header_cache
&& dst
->hh
== NULL
) {
941 write_lock_bh(&neigh
->lock
);
943 neigh_hh_init(neigh
, dst
, dst
->ops
->protocol
);
944 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
945 write_unlock_bh(&neigh
->lock
);
947 read_lock_bh(&neigh
->lock
);
948 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
949 read_unlock_bh(&neigh
->lock
);
952 return neigh
->ops
->queue_xmit(skb
);
959 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst
, dst
? dst
->neighbour
: NULL
);
964 /* As fast as possible without hh cache */
966 int neigh_connected_output(struct sk_buff
*skb
)
969 struct dst_entry
*dst
= skb
->dst
;
970 struct neighbour
*neigh
= dst
->neighbour
;
971 struct net_device
*dev
= neigh
->dev
;
973 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
975 read_lock_bh(&neigh
->lock
);
976 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
977 read_unlock_bh(&neigh
->lock
);
979 return neigh
->ops
->queue_xmit(skb
);
984 static void neigh_proxy_process(unsigned long arg
)
986 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
988 unsigned long now
= jiffies
;
989 struct sk_buff
*skb
= tbl
->proxy_queue
.next
;
991 while (skb
!= (struct sk_buff
*)&tbl
->proxy_queue
) {
992 struct sk_buff
*back
= skb
;
993 long tdif
= back
->stamp
.tv_usec
- now
;
997 __skb_unlink(back
, &tbl
->proxy_queue
);
999 tbl
->proxy_redo(back
);
1002 } else if (!sched_next
|| tdif
< sched_next
)
1005 del_timer(&tbl
->proxy_timer
);
1007 tbl
->proxy_timer
.expires
= jiffies
+ sched_next
;
1008 add_timer(&tbl
->proxy_timer
);
1012 void pneigh_enqueue(struct neigh_table
*tbl
, struct neigh_parms
*p
,
1013 struct sk_buff
*skb
)
1015 unsigned long now
= jiffies
;
1016 long sched_next
= net_random()%p
->proxy_delay
;
1018 if (tbl
->proxy_queue
.qlen
> p
->proxy_qlen
) {
1022 skb
->stamp
.tv_sec
= 0;
1023 skb
->stamp
.tv_usec
= now
+ sched_next
;
1024 if (del_timer(&tbl
->proxy_timer
)) {
1025 long tval
= tbl
->proxy_timer
.expires
- now
;
1026 if (tval
< sched_next
)
1029 tbl
->proxy_timer
.expires
= now
+ sched_next
;
1030 dst_release(skb
->dst
);
1032 __skb_queue_tail(&tbl
->proxy_queue
, skb
);
1033 add_timer(&tbl
->proxy_timer
);
1037 struct neigh_parms
*neigh_parms_alloc(struct net_device
*dev
, struct neigh_table
*tbl
)
1039 struct neigh_parms
*p
;
1040 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
1042 memcpy(p
, &tbl
->parms
, sizeof(*p
));
1044 p
->reachable_time
= neigh_rand_reach_time(p
->base_reachable_time
);
1045 if (dev
&& dev
->neigh_setup
) {
1046 if (dev
->neigh_setup(dev
, p
)) {
1051 write_lock_bh(&tbl
->lock
);
1052 p
->next
= tbl
->parms
.next
;
1053 tbl
->parms
.next
= p
;
1054 write_unlock_bh(&tbl
->lock
);
1059 void neigh_parms_release(struct neigh_table
*tbl
, struct neigh_parms
*parms
)
1061 struct neigh_parms
**p
;
1063 if (parms
== NULL
|| parms
== &tbl
->parms
)
1065 write_lock_bh(&tbl
->lock
);
1066 for (p
= &tbl
->parms
.next
; *p
; p
= &(*p
)->next
) {
1069 write_unlock_bh(&tbl
->lock
);
1070 #ifdef CONFIG_SYSCTL
1071 neigh_sysctl_unregister(parms
);
1077 write_unlock_bh(&tbl
->lock
);
1078 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1082 void neigh_table_init(struct neigh_table
*tbl
)
1084 unsigned long now
= jiffies
;
1086 tbl
->parms
.reachable_time
= neigh_rand_reach_time(tbl
->parms
.base_reachable_time
);
1088 if (tbl
->kmem_cachep
== NULL
)
1089 tbl
->kmem_cachep
= kmem_cache_create(tbl
->id
,
1090 (tbl
->entry_size
+15)&~15,
1091 0, SLAB_HWCACHE_ALIGN
,
1094 init_timer(&tbl
->gc_timer
);
1095 tbl
->lock
= RW_LOCK_UNLOCKED
;
1096 tbl
->gc_timer
.data
= (unsigned long)tbl
;
1097 tbl
->gc_timer
.function
= neigh_periodic_timer
;
1098 tbl
->gc_timer
.expires
= now
+ tbl
->gc_interval
+ tbl
->parms
.reachable_time
;
1099 add_timer(&tbl
->gc_timer
);
1101 init_timer(&tbl
->proxy_timer
);
1102 tbl
->proxy_timer
.data
= (unsigned long)tbl
;
1103 tbl
->proxy_timer
.function
= neigh_proxy_process
;
1104 skb_queue_head_init(&tbl
->proxy_queue
);
1106 tbl
->last_flush
= now
;
1107 tbl
->last_rand
= now
+ tbl
->parms
.reachable_time
*20;
1108 write_lock(&neigh_tbl_lock
);
1109 tbl
->next
= neigh_tables
;
1111 write_unlock(&neigh_tbl_lock
);
1114 int neigh_table_clear(struct neigh_table
*tbl
)
1116 struct neigh_table
**tp
;
1118 del_timer(&tbl
->gc_timer
);
1119 del_timer(&tbl
->proxy_timer
);
1120 skb_queue_purge(&tbl
->proxy_queue
);
1121 neigh_ifdown(tbl
, NULL
);
1123 printk(KERN_CRIT
"neighbour leakage\n");
1124 write_lock(&neigh_tbl_lock
);
1125 for (tp
= &neigh_tables
; *tp
; tp
= &(*tp
)->next
) {
1131 write_unlock(&neigh_tbl_lock
);
1132 #ifdef CONFIG_SYSCTL
1133 neigh_sysctl_unregister(&tbl
->parms
);
1138 #ifdef CONFIG_RTNETLINK
1141 int neigh_delete(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
1143 struct ndmsg
*ndm
= NLMSG_DATA(nlh
);
1144 struct rtattr
**nda
= arg
;
1145 struct neigh_table
*tbl
;
1146 struct net_device
*dev
= NULL
;
1149 if (ndm
->ndm_ifindex
) {
1150 if ((dev
= dev_get_by_index(ndm
->ndm_ifindex
)) == NULL
)
1154 read_lock(&neigh_tbl_lock
);
1155 for (tbl
=neigh_tables
; tbl
; tbl
= tbl
->next
) {
1156 struct neighbour
*n
;
1158 if (tbl
->family
!= ndm
->ndm_family
)
1160 read_unlock(&neigh_tbl_lock
);
1163 if (nda
[NDA_DST
-1] == NULL
||
1164 nda
[NDA_DST
-1]->rta_len
!= RTA_LENGTH(tbl
->key_len
))
1167 if (ndm
->ndm_flags
&NTF_PROXY
) {
1168 err
= pneigh_delete(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1175 n
= neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1177 err
= neigh_update(n
, NULL
, NUD_FAILED
, 1, 0);
1185 read_unlock(&neigh_tbl_lock
);
1190 return -EADDRNOTAVAIL
;
1193 int neigh_add(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
1195 struct ndmsg
*ndm
= NLMSG_DATA(nlh
);
1196 struct rtattr
**nda
= arg
;
1197 struct neigh_table
*tbl
;
1198 struct net_device
*dev
= NULL
;
1200 if (ndm
->ndm_ifindex
) {
1201 if ((dev
= dev_get_by_index(ndm
->ndm_ifindex
)) == NULL
)
1205 read_lock(&neigh_tbl_lock
);
1206 for (tbl
=neigh_tables
; tbl
; tbl
= tbl
->next
) {
1208 struct neighbour
*n
;
1210 if (tbl
->family
!= ndm
->ndm_family
)
1212 read_unlock(&neigh_tbl_lock
);
1215 if (nda
[NDA_DST
-1] == NULL
||
1216 nda
[NDA_DST
-1]->rta_len
!= RTA_LENGTH(tbl
->key_len
))
1218 if (ndm
->ndm_flags
&NTF_PROXY
) {
1220 if (pneigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
, 1))
1227 if (nda
[NDA_LLADDR
-1] != NULL
&&
1228 nda
[NDA_LLADDR
-1]->rta_len
!= RTA_LENGTH(dev
->addr_len
))
1230 n
= neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1232 if (nlh
->nlmsg_flags
&NLM_F_EXCL
)
1234 } else if (!(nlh
->nlmsg_flags
&NLM_F_CREATE
))
1237 n
= __neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
, 1);
1242 err
= neigh_update(n
, nda
[NDA_LLADDR
-1] ? RTA_DATA(nda
[NDA_LLADDR
-1]) : NULL
,
1244 nlh
->nlmsg_flags
&NLM_F_REPLACE
, 0);
1253 read_unlock(&neigh_tbl_lock
);
1257 return -EADDRNOTAVAIL
;
1261 static int neigh_fill_info(struct sk_buff
*skb
, struct neighbour
*n
,
1262 u32 pid
, u32 seq
, int event
)
1264 unsigned long now
= jiffies
;
1266 struct nlmsghdr
*nlh
;
1267 unsigned char *b
= skb
->tail
;
1268 struct nda_cacheinfo ci
;
1271 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*ndm
));
1272 ndm
= NLMSG_DATA(nlh
);
1273 ndm
->ndm_family
= n
->ops
->family
;
1274 ndm
->ndm_flags
= n
->flags
;
1275 ndm
->ndm_type
= n
->type
;
1276 ndm
->ndm_ifindex
= n
->dev
->ifindex
;
1277 RTA_PUT(skb
, NDA_DST
, n
->tbl
->key_len
, n
->primary_key
);
1278 read_lock_bh(&n
->lock
);
1280 ndm
->ndm_state
= n
->nud_state
;
1281 if (n
->nud_state
&NUD_VALID
)
1282 RTA_PUT(skb
, NDA_LLADDR
, n
->dev
->addr_len
, n
->ha
);
1283 ci
.ndm_used
= now
- n
->used
;
1284 ci
.ndm_confirmed
= now
- n
->confirmed
;
1285 ci
.ndm_updated
= now
- n
->updated
;
1286 ci
.ndm_refcnt
= atomic_read(&n
->refcnt
) - 1;
1287 read_unlock_bh(&n
->lock
);
1289 RTA_PUT(skb
, NDA_CACHEINFO
, sizeof(ci
), &ci
);
1290 nlh
->nlmsg_len
= skb
->tail
- b
;
1296 read_unlock_bh(&n
->lock
);
1297 skb_trim(skb
, b
- skb
->data
);
1302 static int neigh_dump_table(struct neigh_table
*tbl
, struct sk_buff
*skb
, struct netlink_callback
*cb
)
1304 struct neighbour
*n
;
1309 s_idx
= idx
= cb
->args
[2];
1310 for (h
=0; h
<= NEIGH_HASHMASK
; h
++) {
1311 if (h
< s_h
) continue;
1314 read_lock_bh(&tbl
->lock
);
1315 for (n
= tbl
->hash_buckets
[h
], idx
= 0; n
;
1316 n
= n
->next
, idx
++) {
1319 if (neigh_fill_info(skb
, n
, NETLINK_CB(cb
->skb
).pid
,
1320 cb
->nlh
->nlmsg_seq
, RTM_NEWNEIGH
) <= 0) {
1321 read_unlock_bh(&tbl
->lock
);
1327 read_unlock_bh(&tbl
->lock
);
1335 int neigh_dump_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1339 struct neigh_table
*tbl
;
1340 int family
= ((struct rtgenmsg
*)NLMSG_DATA(cb
->nlh
))->rtgen_family
;
1344 read_lock(&neigh_tbl_lock
);
1345 for (tbl
=neigh_tables
, t
=0; tbl
; tbl
= tbl
->next
, t
++) {
1346 if (t
< s_t
) continue;
1347 if (family
&& tbl
->family
!= family
)
1350 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1351 if (neigh_dump_table(tbl
, skb
, cb
) < 0)
1354 read_unlock(&neigh_tbl_lock
);
1362 void neigh_app_ns(struct neighbour
*n
)
1364 struct sk_buff
*skb
;
1365 struct nlmsghdr
*nlh
;
1366 int size
= NLMSG_SPACE(sizeof(struct ndmsg
)+256);
1368 skb
= alloc_skb(size
, GFP_ATOMIC
);
1372 if (neigh_fill_info(skb
, n
, 0, 0, RTM_GETNEIGH
) < 0) {
1376 nlh
= (struct nlmsghdr
*)skb
->data
;
1377 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
1378 NETLINK_CB(skb
).dst_groups
= RTMGRP_NEIGH
;
1379 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_NEIGH
, GFP_ATOMIC
);
1382 static void neigh_app_notify(struct neighbour
*n
)
1384 struct sk_buff
*skb
;
1385 struct nlmsghdr
*nlh
;
1386 int size
= NLMSG_SPACE(sizeof(struct ndmsg
)+256);
1388 skb
= alloc_skb(size
, GFP_ATOMIC
);
1392 if (neigh_fill_info(skb
, n
, 0, 0, RTM_NEWNEIGH
) < 0) {
1396 nlh
= (struct nlmsghdr
*)skb
->data
;
1397 NETLINK_CB(skb
).dst_groups
= RTMGRP_NEIGH
;
1398 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_NEIGH
, GFP_ATOMIC
);
1408 #ifdef CONFIG_SYSCTL
1410 struct neigh_sysctl_table
1412 struct ctl_table_header
*sysctl_header
;
1413 ctl_table neigh_vars
[17];
1414 ctl_table neigh_dev
[2];
1415 ctl_table neigh_neigh_dir
[2];
1416 ctl_table neigh_proto_dir
[2];
1417 ctl_table neigh_root_dir
[2];
1418 } neigh_sysctl_template
= {
1420 {{NET_NEIGH_MCAST_SOLICIT
, "mcast_solicit",
1421 NULL
, sizeof(int), 0644, NULL
,
1423 {NET_NEIGH_UCAST_SOLICIT
, "ucast_solicit",
1424 NULL
, sizeof(int), 0644, NULL
,
1426 {NET_NEIGH_APP_SOLICIT
, "app_solicit",
1427 NULL
, sizeof(int), 0644, NULL
,
1429 {NET_NEIGH_RETRANS_TIME
, "retrans_time",
1430 NULL
, sizeof(int), 0644, NULL
,
1432 {NET_NEIGH_REACHABLE_TIME
, "base_reachable_time",
1433 NULL
, sizeof(int), 0644, NULL
,
1434 &proc_dointvec_jiffies
},
1435 {NET_NEIGH_DELAY_PROBE_TIME
, "delay_first_probe_time",
1436 NULL
, sizeof(int), 0644, NULL
,
1437 &proc_dointvec_jiffies
},
1438 {NET_NEIGH_GC_STALE_TIME
, "gc_stale_time",
1439 NULL
, sizeof(int), 0644, NULL
,
1440 &proc_dointvec_jiffies
},
1441 {NET_NEIGH_UNRES_QLEN
, "unres_qlen",
1442 NULL
, sizeof(int), 0644, NULL
,
1444 {NET_NEIGH_PROXY_QLEN
, "proxy_qlen",
1445 NULL
, sizeof(int), 0644, NULL
,
1447 {NET_NEIGH_ANYCAST_DELAY
, "anycast_delay",
1448 NULL
, sizeof(int), 0644, NULL
,
1450 {NET_NEIGH_PROXY_DELAY
, "proxy_delay",
1451 NULL
, sizeof(int), 0644, NULL
,
1453 {NET_NEIGH_LOCKTIME
, "locktime",
1454 NULL
, sizeof(int), 0644, NULL
,
1456 {NET_NEIGH_GC_INTERVAL
, "gc_interval",
1457 NULL
, sizeof(int), 0644, NULL
,
1458 &proc_dointvec_jiffies
},
1459 {NET_NEIGH_GC_THRESH1
, "gc_thresh1",
1460 NULL
, sizeof(int), 0644, NULL
,
1462 {NET_NEIGH_GC_THRESH2
, "gc_thresh2",
1463 NULL
, sizeof(int), 0644, NULL
,
1465 {NET_NEIGH_GC_THRESH3
, "gc_thresh3",
1466 NULL
, sizeof(int), 0644, NULL
,
1470 {{NET_PROTO_CONF_DEFAULT
, "default", NULL
, 0, 0555, NULL
},{0}},
1471 {{0, "neigh", NULL
, 0, 0555, NULL
},{0}},
1472 {{0, NULL
, NULL
, 0, 0555, NULL
},{0}},
1473 {{CTL_NET
, "net", NULL
, 0, 0555, NULL
},{0}}
1476 int neigh_sysctl_register(struct net_device
*dev
, struct neigh_parms
*p
,
1477 int p_id
, int pdev_id
, char *p_name
)
1479 struct neigh_sysctl_table
*t
;
1481 t
= kmalloc(sizeof(*t
), GFP_KERNEL
);
1484 memcpy(t
, &neigh_sysctl_template
, sizeof(*t
));
1485 t
->neigh_vars
[0].data
= &p
->mcast_probes
;
1486 t
->neigh_vars
[1].data
= &p
->ucast_probes
;
1487 t
->neigh_vars
[2].data
= &p
->app_probes
;
1488 t
->neigh_vars
[3].data
= &p
->retrans_time
;
1489 t
->neigh_vars
[4].data
= &p
->base_reachable_time
;
1490 t
->neigh_vars
[5].data
= &p
->delay_probe_time
;
1491 t
->neigh_vars
[6].data
= &p
->gc_staletime
;
1492 t
->neigh_vars
[7].data
= &p
->queue_len
;
1493 t
->neigh_vars
[8].data
= &p
->proxy_qlen
;
1494 t
->neigh_vars
[9].data
= &p
->anycast_delay
;
1495 t
->neigh_vars
[10].data
= &p
->proxy_delay
;
1496 t
->neigh_vars
[11].data
= &p
->locktime
;
1498 t
->neigh_dev
[0].procname
= dev
->name
;
1499 t
->neigh_dev
[0].ctl_name
= dev
->ifindex
;
1500 memset(&t
->neigh_vars
[12], 0, sizeof(ctl_table
));
1502 t
->neigh_vars
[12].data
= (int*)(p
+1);
1503 t
->neigh_vars
[13].data
= (int*)(p
+1) + 1;
1504 t
->neigh_vars
[14].data
= (int*)(p
+1) + 2;
1505 t
->neigh_vars
[15].data
= (int*)(p
+1) + 3;
1507 t
->neigh_neigh_dir
[0].ctl_name
= pdev_id
;
1509 t
->neigh_proto_dir
[0].procname
= p_name
;
1510 t
->neigh_proto_dir
[0].ctl_name
= p_id
;
1512 t
->neigh_dev
[0].child
= t
->neigh_vars
;
1513 t
->neigh_neigh_dir
[0].child
= t
->neigh_dev
;
1514 t
->neigh_proto_dir
[0].child
= t
->neigh_neigh_dir
;
1515 t
->neigh_root_dir
[0].child
= t
->neigh_proto_dir
;
1517 t
->sysctl_header
= register_sysctl_table(t
->neigh_root_dir
, 0);
1518 if (t
->sysctl_header
== NULL
) {
1522 p
->sysctl_table
= t
;
1526 void neigh_sysctl_unregister(struct neigh_parms
*p
)
1528 if (p
->sysctl_table
) {
1529 struct neigh_sysctl_table
*t
= p
->sysctl_table
;
1530 p
->sysctl_table
= NULL
;
1531 unregister_sysctl_table(t
->sysctl_header
);
1536 #endif /* CONFIG_SYSCTL */